Project

General

Profile

« Previous | Next » 

Revision 58994

refactored methods working with json so they can be reused by other plugins

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/utils/JsonUtils.java
1
package eu.dnetlib.data.collector.plugins.utils;
2

  
3
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

  
7
public class JsonUtils {
8

  
9
    private static final Log log = LogFactory.getLog(JsonUtils.class);
10

  
11
    public static final String wrapName = "recordWrap";
12
    /**
13
     * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
14
     * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
15
     * and work-around for the JSON to XML converting of org.json.XML-package.
16
     *
17
     * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
18
     *
19
     * @param jsonInput
20
     * @return convertedJsonKeynameOutput
21
     */
22
    public String syntaxConvertJsonKeyNames(String jsonInput) {
23

  
24
        log.trace("before convertJsonKeyNames: " + jsonInput);
25
        // pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
26
        // replace ' 's in JSON Namens with '_'
27
        while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
28
            jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
29
        }
30

  
31
        // replace forward-slash (sign '/' ) in JSON Names with '_'
32
        while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
33
            jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
34
        }
35

  
36
        // replace '(' in JSON Names with ''
37
        while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
38
            jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
39
        }
40

  
41
        // replace ')' in JSON Names with ''
42
        while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
43
            jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
44
        }
45

  
46
        // add prefix of startNumbers in JSON Keynames with 'n_'
47
        while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
48
            jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
49
        }
50
        // add prefix of only numbers in JSON Keynames with 'm_'
51
        while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
52
            jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
53
        }
54

  
55
        // replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
56
        while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
57
            jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
58
        }
59

  
60
        // replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
61
        //            while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
62
        //                jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
63
        //            }
64

  
65
        // replace '=' in JSON Keynames with '-'
66
        while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
67
            jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
68
        }
69

  
70
        log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
71
        return jsonInput;
72
    }
73

  
74
    /**
75
     *
76
     * https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
77
     *          *
78
     * @param bufferStr - XML string
79
     * @return
80
     */
81
    public String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
82

  
83
        while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
84
            bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
85
        }
86

  
87
        // replace [#x10-#x1f] with ''
88
        //            while (bufferStr.matches(".*&#x1[0-9a-f].*")) {
89
        //                bufferStr = bufferStr.replaceAll("&#x1([0-9a-fA-F])", "");
90
        //            }
91

  
92
        return bufferStr;
93
    }
94

  
95
    public String convertToXML(final String jsonRecord){
96
        String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
97
        org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
98
        resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
99
        log.trace("before inputStream: " + resultXml);
100
        resultXml = XmlCleaner.cleanAllEntities(resultXml);
101
        log.trace("after cleaning: " + resultXml);
102
        return resultXml;
103
    }
104
}

Also available in: Unified diff