Revision 53854
Added by Alessia Bardi almost 6 years ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIterator.java | ||
---|---|---|
1 | 1 |
/** |
2 |
* |
|
3 |
* |
|
4 | 2 |
* log.debug(...) equal to log.trace(...) in the application-logs |
5 |
* |
|
6 |
* known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
|
|
3 |
* <p>
|
|
4 |
* known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue |
|
7 | 5 |
*/ |
8 | 6 |
package eu.dnetlib.data.collector.plugins.rest; |
9 | 7 |
|
... | ... | |
13 | 11 |
import java.util.Iterator; |
14 | 12 |
import java.util.Queue; |
15 | 13 |
import java.util.concurrent.PriorityBlockingQueue; |
16 |
|
|
17 | 14 |
import javax.xml.transform.OutputKeys; |
18 | 15 |
import javax.xml.transform.Transformer; |
19 | 16 |
import javax.xml.transform.TransformerConfigurationException; |
20 | 17 |
import javax.xml.transform.TransformerFactory; |
21 | 18 |
import javax.xml.transform.dom.DOMSource; |
22 | 19 |
import javax.xml.transform.stream.StreamResult; |
23 |
import javax.xml.xpath.XPath; |
|
24 |
import javax.xml.xpath.XPathConstants; |
|
25 |
import javax.xml.xpath.XPathExpression; |
|
26 |
import javax.xml.xpath.XPathExpressionException; |
|
27 |
import javax.xml.xpath.XPathFactory; |
|
20 |
import javax.xml.xpath.*; |
|
28 | 21 |
|
22 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; |
|
23 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
29 | 24 |
import org.apache.commons.io.IOUtils; |
30 | 25 |
import org.apache.commons.lang3.StringUtils; |
31 | 26 |
import org.apache.commons.logging.Log; |
... | ... | |
34 | 29 |
import org.w3c.dom.NodeList; |
35 | 30 |
import org.xml.sax.InputSource; |
36 | 31 |
|
37 |
import eu.dnetlib.data.collector.plugins.oai.OaiIterator; |
|
38 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; |
|
39 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
40 |
|
|
41 | 32 |
/** |
42 | 33 |
* @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak |
43 | 34 |
* @date 2018-09-03 |
... | ... | |
45 | 36 |
*/ |
46 | 37 |
public class RestIterator implements Iterator<String> { |
47 | 38 |
|
48 |
// TODO: clean up the comments of replaced source code
|
|
39 |
// TODO: clean up the comments of replaced source code
|
|
49 | 40 |
private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
50 | 41 |
|
51 | 42 |
private static final String wrapName = "recordWrap"; |
... | ... | |
55 | 46 |
private String resultFormatValue; |
56 | 47 |
private String queryParams; |
57 | 48 |
private int resultSizeValue; |
58 |
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
|
49 |
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
|
59 | 50 |
private int resultTotal = -1; |
60 | 51 |
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest or token scanned from results) |
61 | 52 |
private InputStream resultStream; |
... | ... | |
68 | 59 |
private String queryFormat; |
69 | 60 |
private String querySize; |
70 | 61 |
private Queue<String> recordQueue = new PriorityBlockingQueue<String>(); |
71 |
private int discoverResultSize = 0;
|
|
72 |
private int pagination = 1;
|
|
62 |
private int discoverResultSize = 0;
|
|
63 |
private int pagination = 1;
|
|
73 | 64 |
|
74 | 65 |
public RestIterator( |
75 | 66 |
final String baseUrl, |
... | ... | |
80 | 71 |
final String resultFormatParam, |
81 | 72 |
final String resultFormatValue, |
82 | 73 |
final String resultSizeParam, |
83 |
final String resultSizeValueStr,
|
|
74 |
final String resultSizeValueStr,
|
|
84 | 75 |
final String queryParams, |
85 | 76 |
final String entityXpath |
86 |
) {
|
|
77 |
) { |
|
87 | 78 |
this.baseUrl = baseUrl; |
88 | 79 |
this.resumptionType = resumptionType; |
89 | 80 |
this.resumptionParam = resumptionParam; |
90 | 81 |
this.resultFormatValue = resultFormatValue; |
91 | 82 |
this.queryParams = queryParams; |
92 |
this.resultSizeValue = Integer.valueOf(resultSizeValueStr); |
|
93 |
|
|
94 |
queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : ""; |
|
95 |
querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : ""; |
|
83 |
this.resultSizeValue = Integer.valueOf(resultSizeValueStr); |
|
96 | 84 |
|
85 |
queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : ""; |
|
86 |
querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : ""; |
|
87 |
|
|
97 | 88 |
try { |
98 | 89 |
initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath); |
99 |
} catch(Exception e) { |
|
90 |
} catch (Exception e) {
|
|
100 | 91 |
throw new IllegalStateException("xml transformation init failed: " + e.getMessage()); |
101 | 92 |
} |
102 |
initQueue();
|
|
93 |
initQueue();
|
|
103 | 94 |
} |
104 |
|
|
105 |
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException{ |
|
95 |
|
|
96 |
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) |
|
97 |
throws TransformerConfigurationException, XPathExpressionException { |
|
106 | 98 |
transformer = TransformerFactory.newInstance().newTransformer(); |
107 |
transformer.setOutputProperty(OutputKeys.INDENT,"yes");
|
|
108 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3");
|
|
109 |
xpath = XPathFactory.newInstance().newXPath();
|
|
99 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
|
100 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
|
|
101 |
xpath = XPathFactory.newInstance().newXPath(); |
|
110 | 102 |
xprResultTotalPath = xpath.compile(resultTotalXpath); |
111 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
|
|
112 |
xprEntity = xpath.compile(entityXpath);
|
|
103 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath); |
|
104 |
xprEntity = xpath.compile(entityXpath); |
|
113 | 105 |
} |
114 |
|
|
106 |
|
|
115 | 107 |
private void initQueue() { |
116 | 108 |
query = baseUrl + "?" + queryParams + querySize + queryFormat; |
117 | 109 |
} |
118 |
|
|
110 |
|
|
119 | 111 |
private void disconnect() { |
120 | 112 |
// TODO close inputstream |
121 | 113 |
} |
122 |
|
|
114 |
|
|
123 | 115 |
/* (non-Javadoc) |
124 | 116 |
* @see java.util.Iterator#hasNext() |
125 | 117 |
*/ |
... | ... | |
139 | 131 |
@Override |
140 | 132 |
public String next() { |
141 | 133 |
synchronized (recordQueue) { |
142 |
while (recordQueue.isEmpty() && !query.isEmpty() ) {
|
|
134 |
while (recordQueue.isEmpty() && !query.isEmpty()) { |
|
143 | 135 |
try { |
144 |
log.info("get Query: " + query);
|
|
136 |
log.info("get Query: " + query);
|
|
145 | 137 |
query = downloadPage(query); |
146 |
log.debug("next queryURL from downloadPage(): " + query);
|
|
147 |
} catch(CollectorServiceException e) { |
|
148 |
log.debug("CollectorPlugin.next()-Exception: " + e);
|
|
138 |
log.debug("next queryURL from downloadPage(): " + query);
|
|
139 |
} catch (CollectorServiceException e) {
|
|
140 |
log.debug("CollectorPlugin.next()-Exception: " + e);
|
|
149 | 141 |
throw new RuntimeException(e); |
150 | 142 |
} |
151 | 143 |
} |
152 | 144 |
return recordQueue.poll(); |
153 | 145 |
} |
154 | 146 |
} |
155 |
|
|
156 |
|
|
147 |
|
|
157 | 148 |
/* |
158 | 149 |
* download page and return nextQuery |
159 | 150 |
*/ |
160 |
private String downloadPage(String query) throws CollectorServiceException{ |
|
151 |
private String downloadPage(String query) throws CollectorServiceException {
|
|
161 | 152 |
String resultJson; |
162 | 153 |
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; |
163 | 154 |
String nextQuery = ""; |
164 |
String emptyXml = resultXml + "<"+wrapName+"></"+wrapName+">";
|
|
165 |
Node resultNode = null;
|
|
166 |
NodeList nodeList = null;
|
|
167 |
String qUrlArgument = "";
|
|
168 |
int urlOldResumptionSize = 0;
|
|
169 |
|
|
155 |
String emptyXml = resultXml + "<" + wrapName + "></" + wrapName + ">";
|
|
156 |
Node resultNode = null;
|
|
157 |
NodeList nodeList = null;
|
|
158 |
String qUrlArgument = "";
|
|
159 |
int urlOldResumptionSize = 0;
|
|
160 |
|
|
170 | 161 |
try { |
171 |
URL qUrl = new URL(query);
|
|
172 |
|
|
173 |
resultStream = qUrl.openStream();
|
|
174 |
if("json".equals(resultFormatValue.toLowerCase())){
|
|
175 |
|
|
176 |
resultJson = IOUtils.toString(resultStream,"UTF-8"); |
|
162 |
URL qUrl = new URL(query);
|
|
163 |
|
|
164 |
resultStream = qUrl.openStream();
|
|
165 |
if ("json".equals(resultFormatValue.toLowerCase())) {
|
|
166 |
|
|
167 |
resultJson = IOUtils.toString(resultStream, "UTF-8");
|
|
177 | 168 |
resultJson = syntaxConvertJsonKeyNamens(resultJson); |
178 | 169 |
org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson); |
179 |
resultXml += org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
|
|
170 |
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
|
|
180 | 171 |
log.trace("before inputStream: " + resultXml); |
181 |
resultXml = XmlCleaner.cleanAllEntities(resultXml);
|
|
182 |
log.trace("after cleaning: " + resultXml);
|
|
183 |
resultStream = IOUtils.toInputStream(resultXml,"UTF-8"); |
|
172 |
resultXml = XmlCleaner.cleanAllEntities(resultXml);
|
|
173 |
log.trace("after cleaning: " + resultXml);
|
|
174 |
resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
|
|
184 | 175 |
} |
185 |
|
|
186 |
if (!(emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) {
|
|
187 |
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
|
|
188 |
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
|
189 |
log.debug("nodeList.length: " + nodeList.getLength());
|
|
190 |
for (int i = 0; i < nodeList.getLength(); i++) {
|
|
191 |
StringWriter sw = new StringWriter();
|
|
192 |
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
|
193 |
recordQueue.add(sw.toString());
|
|
194 |
}
|
|
195 |
} else { log.info("resultXml is equal with emptyXml"); }
|
|
196 |
|
|
176 |
|
|
177 |
if (!(emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) {
|
|
178 |
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
|
|
179 |
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
|
180 |
log.debug("nodeList.length: " + nodeList.getLength());
|
|
181 |
for (int i = 0; i < nodeList.getLength(); i++) {
|
|
182 |
StringWriter sw = new StringWriter();
|
|
183 |
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
|
184 |
recordQueue.add(sw.toString());
|
|
185 |
}
|
|
186 |
} else { log.info("resultXml is equal with emptyXml"); }
|
|
187 |
|
|
197 | 188 |
resumptionInt += resultSizeValue; |
198 |
|
|
199 |
switch(resumptionType.toLowerCase()) { |
|
200 |
case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items |
|
201 |
resumptionStr = xprResumptionPath.evaluate(resultNode); |
|
202 |
break; |
|
203 |
|
|
204 |
case "count": // begin at one step for all records, iterate over items |
|
205 |
resumptionStr = Integer.toString(resumptionInt); |
|
206 |
break; |
|
207 |
|
|
208 |
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808) |
|
209 |
if (resultSizeValue < 2 ) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");} |
|
210 |
qUrlArgument = qUrl.getQuery(); |
|
211 |
String[] arrayQUrlArgument = qUrlArgument.split("&"); |
|
212 |
for(String arrayUrlArgStr : arrayQUrlArgument ) { |
|
213 |
if(arrayUrlArgStr.startsWith(resumptionParam)) { |
|
214 |
String[] resumptionKeyValue = arrayUrlArgStr.split("="); |
|
215 |
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); |
|
216 |
log.debug("discover OldResumptionSize from Url: " + urlOldResumptionSize); |
|
217 |
} |
|
218 |
} |
|
219 | 189 |
|
220 |
if( ( (emptyXml.toLowerCase()).equals(resultXml.toLowerCase()) ) |
|
221 |
|| ( (nodeList != null) && (nodeList.getLength() < resultSizeValue) ) |
|
222 |
) { |
|
223 |
// resumptionStr = ""; |
|
224 |
if(nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
225 |
resultTotal = discoverResultSize; |
|
226 |
} else { |
|
227 |
resumptionStr = Integer.toString(resumptionInt); |
|
228 |
resultTotal = resumptionInt+1; |
|
229 |
if(nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
230 |
} |
|
231 |
log.info("discoverResultSize: " + discoverResultSize); |
|
232 |
break; |
|
233 |
|
|
234 |
case "pagination": |
|
235 |
case "page": // pagination, iterate over pages |
|
236 |
pagination += 1; |
|
237 |
if (nodeList != null) { |
|
238 |
discoverResultSize += nodeList.getLength(); |
|
239 |
} else { |
|
240 |
resultTotal = discoverResultSize; |
|
241 |
pagination = discoverResultSize; |
|
242 |
} |
|
243 |
resumptionInt = pagination; |
|
244 |
resumptionStr = Integer.toString(resumptionInt); |
|
245 |
break; |
|
246 |
|
|
247 |
default: // otherwise: abort |
|
248 |
// resultTotal = resumptionInt; |
|
249 |
break; |
|
250 |
} |
|
190 |
switch (resumptionType.toLowerCase()) { |
|
191 |
case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items |
|
192 |
resumptionStr = xprResumptionPath.evaluate(resultNode); |
|
193 |
break; |
|
251 | 194 |
|
195 |
case "count": // begin at one step for all records, iterate over items |
|
196 |
resumptionStr = Integer.toString(resumptionInt); |
|
197 |
break; |
|
198 |
|
|
199 |
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808) |
|
200 |
if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");} |
|
201 |
qUrlArgument = qUrl.getQuery(); |
|
202 |
String[] arrayQUrlArgument = qUrlArgument.split("&"); |
|
203 |
for (String arrayUrlArgStr : arrayQUrlArgument) { |
|
204 |
if (arrayUrlArgStr.startsWith(resumptionParam)) { |
|
205 |
String[] resumptionKeyValue = arrayUrlArgStr.split("="); |
|
206 |
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); |
|
207 |
log.debug("discover OldResumptionSize from Url: " + urlOldResumptionSize); |
|
208 |
} |
|
209 |
} |
|
210 |
|
|
211 |
if (((emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) |
|
212 |
|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue)) |
|
213 |
) { |
|
214 |
// resumptionStr = ""; |
|
215 |
if (nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
216 |
resultTotal = discoverResultSize; |
|
217 |
} else { |
|
218 |
resumptionStr = Integer.toString(resumptionInt); |
|
219 |
resultTotal = resumptionInt + 1; |
|
220 |
if (nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
221 |
} |
|
222 |
log.info("discoverResultSize: " + discoverResultSize); |
|
223 |
break; |
|
224 |
|
|
225 |
case "pagination": |
|
226 |
case "page": // pagination, iterate over pages |
|
227 |
pagination += 1; |
|
228 |
if (nodeList != null) { |
|
229 |
discoverResultSize += nodeList.getLength(); |
|
230 |
} else { |
|
231 |
resultTotal = discoverResultSize; |
|
232 |
pagination = discoverResultSize; |
|
233 |
} |
|
234 |
resumptionInt = pagination; |
|
235 |
resumptionStr = Integer.toString(resumptionInt); |
|
236 |
break; |
|
237 |
|
|
238 |
default: // otherwise: abort |
|
239 |
// resultTotal = resumptionInt; |
|
240 |
break; |
|
241 |
} |
|
242 |
|
|
252 | 243 |
if (resultTotal == -1) { |
253 | 244 |
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode)); |
254 |
if(resumptionType.toLowerCase().equals("page")) { resultTotal += 1; } // to correct the upper bound
|
|
245 |
if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; } // to correct the upper bound
|
|
255 | 246 |
log.info("resultTotal was -1 is now: " + resultTotal); |
256 | 247 |
} |
257 | 248 |
log.info("resultTotal: " + resultTotal); |
... | ... | |
260 | 251 |
nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat; |
261 | 252 |
} else |
262 | 253 |
nextQuery = ""; |
263 |
|
|
264 |
log.debug("nextQueryUrl: " + nextQuery);
|
|
254 |
|
|
255 |
log.debug("nextQueryUrl: " + nextQuery);
|
|
265 | 256 |
return nextQuery; |
266 | 257 |
|
267 |
} catch(Exception e) { |
|
258 |
} catch (Exception e) {
|
|
268 | 259 |
log.error(e); |
269 | 260 |
throw new IllegalStateException("collection failed: " + e.getMessage()); |
270 | 261 |
} |
271 | 262 |
} |
272 |
|
|
273 |
/** |
|
274 |
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to '' |
|
275 |
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names |
|
276 |
* and work-around for the JSON to XML converting of org.json.XML-package. |
|
277 |
* |
|
278 |
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"], |
|
279 |
* |
|
280 |
* @param jsonInput |
|
281 |
* @return convertedJsonKeynameOutput |
|
282 |
*/ |
|
283 |
private String syntaxConvertJsonKeyNamens(String jsonInput) { |
|
284 | 263 |
|
285 |
log.trace("before convertJsonKeyNames: " + jsonInput); |
|
286 |
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml) |
|
287 |
// replace ' 's in JSON Namens with '_' |
|
288 |
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) { |
|
289 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
290 |
} |
|
264 |
/** |
|
265 |
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to '' |
|
266 |
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names |
|
267 |
* and work-around for the JSON to XML converting of org.json.XML-package. |
|
268 |
* |
|
269 |
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"], |
|
270 |
* |
|
271 |
* @param jsonInput |
|
272 |
* @return convertedJsonKeynameOutput |
|
273 |
*/ |
|
274 |
private String syntaxConvertJsonKeyNamens(String jsonInput) { |
|
291 | 275 |
|
292 |
// replace forward-slash (sign '/' ) in JSON Names with '_' |
|
293 |
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) { |
|
294 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":"); |
|
295 |
} |
|
276 |
log.trace("before convertJsonKeyNames: " + jsonInput); |
|
277 |
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml) |
|
278 |
// replace ' 's in JSON Namens with '_' |
|
279 |
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) { |
|
280 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
281 |
} |
|
296 | 282 |
|
297 |
// replace '(' in JSON Names with '' |
|
298 |
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) { |
|
299 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":"); |
|
300 |
} |
|
301 |
|
|
302 |
// replace ')' in JSON Names with '' |
|
303 |
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) { |
|
304 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":"); |
|
305 |
} |
|
283 |
// replace forward-slash (sign '/' ) in JSON Names with '_' |
|
284 |
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) { |
|
285 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":"); |
|
286 |
} |
|
306 | 287 |
|
307 |
// replace startNumbers in JSON Keynames with 'n_' |
|
308 |
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) { |
|
309 |
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":"); |
|
310 |
} |
|
311 |
|
|
312 |
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with '' |
|
313 |
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) { |
|
314 |
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":"); |
|
315 |
} |
|
288 |
// replace '(' in JSON Names with '' |
|
289 |
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) { |
|
290 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":"); |
|
291 |
} |
|
316 | 292 |
|
317 |
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames. |
|
318 |
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) { |
|
319 |
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":"); |
|
320 |
// } |
|
321 |
|
|
322 |
// replace '=' in JSON Keynames with '-' |
|
323 |
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) { |
|
324 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":"); |
|
325 |
} |
|
326 |
|
|
327 |
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput); |
|
328 |
return jsonInput; |
|
329 |
} |
|
330 |
|
|
331 |
/** |
|
332 |
* |
|
333 |
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities |
|
334 |
* * |
|
335 |
* @param bufferStr - XML string |
|
336 |
* @return |
|
337 |
*/ |
|
338 |
private static String cleanUnwantedJsonCharsInXmlTagnames( String bufferStr ) { |
|
339 |
|
|
340 |
while ( bufferStr.matches(".*<([^<>].*),(.)>.*") ) { |
|
341 |
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>"); |
|
342 |
} |
|
293 |
// replace ')' in JSON Names with '' |
|
294 |
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) { |
|
295 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":"); |
|
296 |
} |
|
343 | 297 |
|
344 |
// replace [#x10-#x1f] with '' |
|
345 |
// while (bufferStr.matches(".*[0-9a-f].*")) { |
|
346 |
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", ""); |
|
347 |
// } |
|
348 |
|
|
349 |
return bufferStr; |
|
350 |
} |
|
298 |
// replace startNumbers in JSON Keynames with 'n_' |
|
299 |
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) { |
|
300 |
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":"); |
|
301 |
} |
|
351 | 302 |
|
303 |
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with '' |
|
304 |
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) { |
|
305 |
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":"); |
|
306 |
} |
|
307 |
|
|
308 |
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames. |
|
309 |
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) { |
|
310 |
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":"); |
|
311 |
// } |
|
312 |
|
|
313 |
// replace '=' in JSON Keynames with '-' |
|
314 |
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) { |
|
315 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":"); |
|
316 |
} |
|
317 |
|
|
318 |
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput); |
|
319 |
return jsonInput; |
|
320 |
} |
|
321 |
|
|
322 |
/** |
|
323 |
* |
|
324 |
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities |
|
325 |
* * |
|
326 |
* @param bufferStr - XML string |
|
327 |
* @return |
|
328 |
*/ |
|
329 |
private static String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) { |
|
330 |
|
|
331 |
while (bufferStr.matches(".*<([^<>].*),(.)>.*")) { |
|
332 |
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>"); |
|
333 |
} |
|
334 |
|
|
335 |
// replace [#x10-#x1f] with '' |
|
336 |
// while (bufferStr.matches(".*[0-9a-f].*")) { |
|
337 |
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", ""); |
|
338 |
// } |
|
339 |
|
|
340 |
return bufferStr; |
|
341 |
} |
|
342 |
|
|
352 | 343 |
} |
Also available in: Unified diff
code formatting