Revision 58994
Added by Alessia Bardi about 4 years ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/utils/JsonUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.utils; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; |
|
4 |
import org.apache.commons.logging.Log; |
|
5 |
import org.apache.commons.logging.LogFactory; |
|
6 |
|
|
7 |
public class JsonUtils { |
|
8 |
|
|
9 |
private static final Log log = LogFactory.getLog(JsonUtils.class); |
|
10 |
|
|
11 |
public static final String wrapName = "recordWrap"; |
|
12 |
/** |
|
13 |
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to '' |
|
14 |
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names |
|
15 |
* and work-around for the JSON to XML converting of org.json.XML-package. |
|
16 |
* |
|
17 |
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"], |
|
18 |
* |
|
19 |
* @param jsonInput |
|
20 |
* @return convertedJsonKeynameOutput |
|
21 |
*/ |
|
22 |
public String syntaxConvertJsonKeyNames(String jsonInput) { |
|
23 |
|
|
24 |
log.trace("before convertJsonKeyNames: " + jsonInput); |
|
25 |
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml) |
|
26 |
// replace ' 's in JSON Namens with '_' |
|
27 |
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) { |
|
28 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
29 |
} |
|
30 |
|
|
31 |
// replace forward-slash (sign '/' ) in JSON Names with '_' |
|
32 |
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) { |
|
33 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":"); |
|
34 |
} |
|
35 |
|
|
36 |
// replace '(' in JSON Names with '' |
|
37 |
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) { |
|
38 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":"); |
|
39 |
} |
|
40 |
|
|
41 |
// replace ')' in JSON Names with '' |
|
42 |
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) { |
|
43 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":"); |
|
44 |
} |
|
45 |
|
|
46 |
// add prefix of startNumbers in JSON Keynames with 'n_' |
|
47 |
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) { |
|
48 |
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":"); |
|
49 |
} |
|
50 |
// add prefix of only numbers in JSON Keynames with 'm_' |
|
51 |
while (jsonInput.matches(".*\"([0-9]+)\":.*")) { |
|
52 |
jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":"); |
|
53 |
} |
|
54 |
|
|
55 |
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with '' |
|
56 |
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) { |
|
57 |
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":"); |
|
58 |
} |
|
59 |
|
|
60 |
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames. |
|
61 |
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) { |
|
62 |
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":"); |
|
63 |
// } |
|
64 |
|
|
65 |
// replace '=' in JSON Keynames with '-' |
|
66 |
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) { |
|
67 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":"); |
|
68 |
} |
|
69 |
|
|
70 |
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput); |
|
71 |
return jsonInput; |
|
72 |
} |
|
73 |
|
|
74 |
/** |
|
75 |
* |
|
76 |
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities |
|
77 |
* * |
|
78 |
* @param bufferStr - XML string |
|
79 |
* @return |
|
80 |
*/ |
|
81 |
public String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) { |
|
82 |
|
|
83 |
while (bufferStr.matches(".*<([^<>].*),(.)>.*")) { |
|
84 |
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>"); |
|
85 |
} |
|
86 |
|
|
87 |
// replace [#x10-#x1f] with '' |
|
88 |
// while (bufferStr.matches(".*[0-9a-f].*")) { |
|
89 |
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", ""); |
|
90 |
// } |
|
91 |
|
|
92 |
return bufferStr; |
|
93 |
} |
|
94 |
|
|
95 |
public String convertToXML(final String jsonRecord){ |
|
96 |
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; |
|
97 |
org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord)); |
|
98 |
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element |
|
99 |
log.trace("before inputStream: " + resultXml); |
|
100 |
resultXml = XmlCleaner.cleanAllEntities(resultXml); |
|
101 |
log.trace("after cleaning: " + resultXml); |
|
102 |
return resultXml; |
|
103 |
} |
|
104 |
} |
Also available in: Unified diff
refactored methods working with json so they can be reused by other plugins