24 |
24 |
import javax.xml.xpath.*;
|
25 |
25 |
|
26 |
26 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
|
|
27 |
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
|
27 |
28 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
28 |
29 |
import org.apache.commons.io.IOUtils;
|
29 |
30 |
import org.apache.commons.lang3.StringUtils;
|
... | ... | |
43 |
44 |
// TODO: clean up the comments of replaced source code
|
44 |
45 |
private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
|
45 |
46 |
|
46 |
|
private static final String wrapName = "recordWrap";
|
|
47 |
private JsonUtils jsonUtils;
|
|
48 |
|
47 |
49 |
private String baseUrl;
|
48 |
50 |
private String resumptionType;
|
49 |
51 |
private String resumptionParam;
|
... | ... | |
152 |
154 |
final String authMethod,
|
153 |
155 |
final String authToken
|
154 |
156 |
) {
|
|
157 |
this.jsonUtils = new JsonUtils();
|
155 |
158 |
this.baseUrl = baseUrl;
|
156 |
159 |
this.resumptionType = resumptionType;
|
157 |
160 |
this.resumptionParam = resumptionParam;
|
... | ... | |
232 |
235 |
String resultJson;
|
233 |
236 |
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
234 |
237 |
String nextQuery = "";
|
235 |
|
String emptyXml = resultXml + "<" + wrapName + "></" + wrapName + ">";
|
|
238 |
String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
|
236 |
239 |
Node resultNode = null;
|
237 |
240 |
NodeList nodeList = null;
|
238 |
241 |
String qUrlArgument = "";
|
... | ... | |
251 |
254 |
URL qUrl = new URL(query);
|
252 |
255 |
|
253 |
256 |
if (this.authMethod == "bearer") {
|
254 |
|
log.trace("authMethode before inputStream: " + resultXml);
|
|
257 |
log.trace("authMethod before inputStream: " + resultXml);
|
255 |
258 |
|
256 |
259 |
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
257 |
260 |
conn.setRequestProperty("Authorization","Bearer "+authToken);
|
... | ... | |
265 |
268 |
resultStream = theHttpInputStream;
|
266 |
269 |
if ("json".equals(resultFormatValue.toLowerCase())) {
|
267 |
270 |
resultJson = IOUtils.toString(resultStream, "UTF-8");
|
268 |
|
resultJson = syntaxConvertJsonKeyNamens(resultJson);
|
269 |
|
org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
|
270 |
|
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
|
271 |
|
log.trace("before inputStream: " + resultXml);
|
272 |
|
resultXml = XmlCleaner.cleanAllEntities(resultXml);
|
273 |
|
log.trace("after cleaning: " + resultXml);
|
|
271 |
resultXml = jsonUtils.convertToXML(resultJson);
|
274 |
272 |
resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
|
275 |
273 |
}
|
276 |
274 |
|
... | ... | |
392 |
390 |
|
393 |
391 |
}
|
394 |
392 |
|
395 |
|
/**
|
396 |
|
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
|
397 |
|
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
|
398 |
|
* and work-around for the JSON to XML converting of org.json.XML-package.
|
399 |
|
*
|
400 |
|
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"],
|
401 |
|
*
|
402 |
|
* @param jsonInput
|
403 |
|
* @return convertedJsonKeynameOutput
|
404 |
|
*/
|
405 |
|
private String syntaxConvertJsonKeyNamens(String jsonInput) {
|
406 |
393 |
|
407 |
|
log.trace("before convertJsonKeyNames: " + jsonInput);
|
408 |
|
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
|
409 |
|
// replace ' 's in JSON Namens with '_'
|
410 |
|
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
|
411 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
|
412 |
|
}
|
413 |
|
|
414 |
|
// replace forward-slash (sign '/' ) in JSON Names with '_'
|
415 |
|
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
|
416 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
|
417 |
|
}
|
418 |
|
|
419 |
|
// replace '(' in JSON Names with ''
|
420 |
|
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
|
421 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
|
422 |
|
}
|
423 |
|
|
424 |
|
// replace ')' in JSON Names with ''
|
425 |
|
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
|
426 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
|
427 |
|
}
|
428 |
|
|
429 |
|
// add prefix of startNumbers in JSON Keynames with 'n_'
|
430 |
|
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
|
431 |
|
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
|
432 |
|
}
|
433 |
|
// add prefix of only numbers in JSON Keynames with 'm_'
|
434 |
|
while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
|
435 |
|
jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
|
436 |
|
}
|
437 |
|
|
438 |
|
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
|
439 |
|
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
|
440 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
|
441 |
|
}
|
442 |
|
|
443 |
|
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
|
444 |
|
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
|
445 |
|
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
|
446 |
|
// }
|
447 |
|
|
448 |
|
// replace '=' in JSON Keynames with '-'
|
449 |
|
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
|
450 |
|
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
|
451 |
|
}
|
452 |
|
|
453 |
|
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
|
454 |
|
return jsonInput;
|
455 |
|
}
|
456 |
|
|
457 |
|
/**
|
458 |
|
*
|
459 |
|
* https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
|
460 |
|
* *
|
461 |
|
* @param bufferStr - XML string
|
462 |
|
* @return
|
463 |
|
*/
|
464 |
|
private static String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
|
465 |
|
|
466 |
|
while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
|
467 |
|
bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
|
468 |
|
}
|
469 |
|
|
470 |
|
// replace [#x10-#x1f] with ''
|
471 |
|
// while (bufferStr.matches(".*[0-9a-f].*")) {
|
472 |
|
// bufferStr = bufferStr.replaceAll("([0-9a-fA-F])", "");
|
473 |
|
// }
|
474 |
|
|
475 |
|
return bufferStr;
|
476 |
|
}
|
477 |
394 |
|
478 |
395 |
private boolean isInteger(String s) {
|
479 |
396 |
boolean isValidInteger = false;
|
refactored methods working with json so they can be reused by other plugins