1
|
package eu.dnetlib.data.collector.plugins.utils;
|
2
|
|
3
|
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
|
4
|
import org.apache.commons.logging.Log;
|
5
|
import org.apache.commons.logging.LogFactory;
|
6
|
|
7
|
public class JsonUtils {
|
8
|
|
9
|
private static final Log log = LogFactory.getLog(JsonUtils.class);
|
10
|
|
11
|
public static final String wrapName = "recordWrap";
|
12
|
/**
|
13
|
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
|
14
|
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
|
15
|
* and work-around for the JSON to XML converting of org.json.XML-package.
|
16
|
*
|
17
|
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"],
|
18
|
*
|
19
|
* @param jsonInput
|
20
|
* @return convertedJsonKeynameOutput
|
21
|
*/
|
22
|
public String syntaxConvertJsonKeyNames(String jsonInput) {
|
23
|
|
24
|
log.trace("before convertJsonKeyNames: " + jsonInput);
|
25
|
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
|
26
|
// replace ' 's in JSON Namens with '_'
|
27
|
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
|
28
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
|
29
|
}
|
30
|
|
31
|
// replace forward-slash (sign '/' ) in JSON Names with '_'
|
32
|
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
|
33
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
|
34
|
}
|
35
|
|
36
|
// replace '(' in JSON Names with ''
|
37
|
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
|
38
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
|
39
|
}
|
40
|
|
41
|
// replace ')' in JSON Names with ''
|
42
|
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
|
43
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
|
44
|
}
|
45
|
|
46
|
// add prefix of startNumbers in JSON Keynames with 'n_'
|
47
|
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
|
48
|
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
|
49
|
}
|
50
|
// add prefix of only numbers in JSON Keynames with 'm_'
|
51
|
while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
|
52
|
jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
|
53
|
}
|
54
|
|
55
|
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
|
56
|
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
|
57
|
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
|
58
|
}
|
59
|
|
60
|
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
|
61
|
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
|
62
|
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
|
63
|
// }
|
64
|
|
65
|
// replace '=' in JSON Keynames with '-'
|
66
|
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
|
67
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
|
68
|
}
|
69
|
|
70
|
// replace '@' in JSON Keynames with 'oat_'
|
71
|
while (jsonInput.matches(".*\"@([^\"]*)\":.*")) {
|
72
|
jsonInput = jsonInput.replaceAll("\"@([^\"]*)\":", "\"oat_$1\":");
|
73
|
}
|
74
|
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
|
75
|
return jsonInput;
|
76
|
}
|
77
|
|
78
|
/**
|
79
|
*
|
80
|
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
|
81
|
* *
|
82
|
* @param bufferStr - XML string
|
83
|
* @return
|
84
|
*/
|
85
|
public String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
|
86
|
|
87
|
while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
|
88
|
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
|
89
|
}
|
90
|
|
91
|
// replace [#x10-#x1f] with ''
|
92
|
// while (bufferStr.matches(".*[0-9a-f].*")) {
|
93
|
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", "");
|
94
|
// }
|
95
|
|
96
|
return bufferStr;
|
97
|
}
|
98
|
|
99
|
public String convertToXML(final String jsonRecord){
|
100
|
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
101
|
|
102
|
log.trace("before convertToXML: " + jsonRecord);
|
103
|
org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
|
104
|
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
|
105
|
log.trace("before inputStream: " + resultXml);
|
106
|
resultXml = XmlCleaner.cleanAllEntities(resultXml);
|
107
|
log.trace("after cleaning and end of convertToXML: " + resultXml);
|
108
|
return resultXml;
|
109
|
}
|
110
|
}
|