Revision 58993
Added by Alessia Bardi almost 4 years ago
RestIterator.java | ||
---|---|---|
24 | 24 |
import javax.xml.xpath.*; |
25 | 25 |
|
26 | 26 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; |
27 |
import eu.dnetlib.data.collector.plugins.utils.JsonUtils; |
|
27 | 28 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
28 | 29 |
import org.apache.commons.io.IOUtils; |
29 | 30 |
import org.apache.commons.lang3.StringUtils; |
... | ... | |
43 | 44 |
// TODO: clean up the comments of replaced source code |
44 | 45 |
private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
45 | 46 |
|
46 |
private static final String wrapName = "recordWrap"; |
|
47 |
private JsonUtils jsonUtils; |
|
48 |
|
|
47 | 49 |
private String baseUrl; |
48 | 50 |
private String resumptionType; |
49 | 51 |
private String resumptionParam; |
... | ... | |
152 | 154 |
final String authMethod, |
153 | 155 |
final String authToken |
154 | 156 |
) { |
157 |
this.jsonUtils = new JsonUtils(); |
|
155 | 158 |
this.baseUrl = baseUrl; |
156 | 159 |
this.resumptionType = resumptionType; |
157 | 160 |
this.resumptionParam = resumptionParam; |
... | ... | |
232 | 235 |
String resultJson; |
233 | 236 |
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; |
234 | 237 |
String nextQuery = ""; |
235 |
String emptyXml = resultXml + "<" + wrapName + "></" + wrapName + ">";
|
|
238 |
String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
|
|
236 | 239 |
Node resultNode = null; |
237 | 240 |
NodeList nodeList = null; |
238 | 241 |
String qUrlArgument = ""; |
... | ... | |
251 | 254 |
URL qUrl = new URL(query); |
252 | 255 |
|
253 | 256 |
if (this.authMethod == "bearer") { |
254 |
log.trace("authMethode before inputStream: " + resultXml);
|
|
257 |
log.trace("authMethod before inputStream: " + resultXml); |
|
255 | 258 |
|
256 | 259 |
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); |
257 | 260 |
conn.setRequestProperty("Authorization","Bearer "+authToken); |
... | ... | |
265 | 268 |
resultStream = theHttpInputStream; |
266 | 269 |
if ("json".equals(resultFormatValue.toLowerCase())) { |
267 | 270 |
resultJson = IOUtils.toString(resultStream, "UTF-8"); |
268 |
resultJson = syntaxConvertJsonKeyNamens(resultJson); |
|
269 |
org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson); |
|
270 |
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element |
|
271 |
log.trace("before inputStream: " + resultXml); |
|
272 |
resultXml = XmlCleaner.cleanAllEntities(resultXml); |
|
273 |
log.trace("after cleaning: " + resultXml); |
|
271 |
resultXml = jsonUtils.convertToXML(resultJson); |
|
274 | 272 |
resultStream = IOUtils.toInputStream(resultXml, "UTF-8"); |
275 | 273 |
} |
276 | 274 |
|
... | ... | |
392 | 390 |
|
393 | 391 |
} |
394 | 392 |
|
395 |
/** |
|
396 |
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to '' |
|
397 |
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names |
|
398 |
* and work-around for the JSON to XML converting of org.json.XML-package. |
|
399 |
* |
|
400 |
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"], |
|
401 |
* |
|
402 |
* @param jsonInput |
|
403 |
* @return convertedJsonKeynameOutput |
|
404 |
*/ |
|
405 |
private String syntaxConvertJsonKeyNamens(String jsonInput) { |
|
406 | 393 |
|
407 |
log.trace("before convertJsonKeyNames: " + jsonInput); |
|
408 |
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml) |
|
409 |
// replace ' 's in JSON Namens with '_' |
|
410 |
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) { |
|
411 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
412 |
} |
|
413 |
|
|
414 |
// replace forward-slash (sign '/' ) in JSON Names with '_' |
|
415 |
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) { |
|
416 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":"); |
|
417 |
} |
|
418 |
|
|
419 |
// replace '(' in JSON Names with '' |
|
420 |
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) { |
|
421 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":"); |
|
422 |
} |
|
423 |
|
|
424 |
// replace ')' in JSON Names with '' |
|
425 |
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) { |
|
426 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":"); |
|
427 |
} |
|
428 |
|
|
429 |
// add prefix of startNumbers in JSON Keynames with 'n_' |
|
430 |
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) { |
|
431 |
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":"); |
|
432 |
} |
|
433 |
// add prefix of only numbers in JSON Keynames with 'm_' |
|
434 |
while (jsonInput.matches(".*\"([0-9]+)\":.*")) { |
|
435 |
jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":"); |
|
436 |
} |
|
437 |
|
|
438 |
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with '' |
|
439 |
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) { |
|
440 |
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":"); |
|
441 |
} |
|
442 |
|
|
443 |
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames. |
|
444 |
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) { |
|
445 |
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":"); |
|
446 |
// } |
|
447 |
|
|
448 |
// replace '=' in JSON Keynames with '-' |
|
449 |
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) { |
|
450 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":"); |
|
451 |
} |
|
452 |
|
|
453 |
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput); |
|
454 |
return jsonInput; |
|
455 |
} |
|
456 |
|
|
457 |
/** |
|
458 |
* |
|
459 |
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities |
|
460 |
* * |
|
461 |
* @param bufferStr - XML string |
|
462 |
* @return |
|
463 |
*/ |
|
464 |
private static String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) { |
|
465 |
|
|
466 |
while (bufferStr.matches(".*<([^<>].*),(.)>.*")) { |
|
467 |
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>"); |
|
468 |
} |
|
469 |
|
|
470 |
// replace [#x10-#x1f] with '' |
|
471 |
// while (bufferStr.matches(".*[0-9a-f].*")) { |
|
472 |
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", ""); |
|
473 |
// } |
|
474 |
|
|
475 |
return bufferStr; |
|
476 |
} |
|
477 | 394 |
|
478 | 395 |
private boolean isInteger(String s) { |
479 | 396 |
boolean isValidInteger = false; |
Also available in: Unified diff
refactored methods working with json so they can be reused by other plugins