Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.utils;
2

    
3
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

    
7
public class JsonUtils {
8

    
9
    private static final Log log = LogFactory.getLog(JsonUtils.class);
10

    
11
    public static final String wrapName = "recordWrap";
12
    /**
13
     * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
14
     * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
15
     * and work-around for the JSON to XML converting of org.json.XML-package.
16
     *
17
     * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
18
     *
19
     * @param jsonInput
20
     * @return convertedJsonKeynameOutput
21
     */
22
    public String syntaxConvertJsonKeyNames(String jsonInput) {
23

    
24
        log.trace("before convertJsonKeyNames: " + jsonInput);
25
        // pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
26
        // replace ' 's in JSON Namens with '_'
27
        while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
28
            jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
29
        }
30

    
31
        // replace forward-slash (sign '/' ) in JSON Names with '_'
32
        while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
33
            jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
34
        }
35

    
36
        // replace '(' in JSON Names with ''
37
        while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
38
            jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
39
        }
40

    
41
        // replace ')' in JSON Names with ''
42
        while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
43
            jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
44
        }
45

    
46
        // add prefix of startNumbers in JSON Keynames with 'n_'
47
        while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
48
            jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
49
        }
50
        // add prefix of only numbers in JSON Keynames with 'm_'
51
        while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
52
            jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
53
        }
54

    
55
        // replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
56
        while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
57
            jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
58
        }
59

    
60
        // replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
61
        //            while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
62
        //                jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
63
        //            }
64

    
65
        // replace '=' in JSON Keynames with '-'
66
        while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
67
            jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
68
        }
69

    
70
        // replace '@' in JSON Keynames with 'oat_'
71
        while (jsonInput.matches(".*\"@([^\"]*)\":.*")) {
72
            jsonInput = jsonInput.replaceAll("\"@([^\"]*)\":", "\"oat_$1\":");
73
        }
74
        log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
75
        return jsonInput;
76
    }
77

    
78
    /**
79
     *
80
     * https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
81
     *          *
82
     * @param bufferStr - XML string
83
     * @return
84
     */
85
    public String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
86

    
87
        while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
88
            bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
89
        }
90

    
91
        // replace [#x10-#x1f] with ''
92
        //            while (bufferStr.matches(".*&#x1[0-9a-f].*")) {
93
        //                bufferStr = bufferStr.replaceAll("&#x1([0-9a-fA-F])", "");
94
        //            }
95

    
96
        return bufferStr;
97
    }
98

    
99
    public String convertToXML(final String jsonRecord){
100
        String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
101

    
102
        log.trace("before convertToXML: " + jsonRecord);
103
        org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
104
        resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
105
        log.trace("before inputStream: " + resultXml);
106
        resultXml = XmlCleaner.cleanAllEntities(resultXml);
107
        log.trace("after cleaning and end of convertToXML: " + resultXml);
108
        return resultXml;
109
    }
110
}
    (1-1/1)