Project

General

Profile

« Previous | Next » 

Revision 58993

refactored methods working with json so they can be reused by other plugins

View differences:

RestIterator.java
24 24
import javax.xml.xpath.*;
25 25

  
26 26
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
27
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
27 28
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
28 29
import org.apache.commons.io.IOUtils;
29 30
import org.apache.commons.lang3.StringUtils;
......
43 44
	// TODO: clean up the comments of replaced source code
44 45
	private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
45 46

  
46
	private static final String wrapName = "recordWrap";
47
	private JsonUtils jsonUtils;
48

  
47 49
	private String baseUrl;
48 50
	private String resumptionType;
49 51
	private String resumptionParam;
......
152 154
			final String authMethod,
153 155
			final String authToken
154 156
	) {
157
		this.jsonUtils = new JsonUtils();
155 158
		this.baseUrl = baseUrl;
156 159
		this.resumptionType = resumptionType;
157 160
		this.resumptionParam = resumptionParam;
......
232 235
		String resultJson;
233 236
		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
234 237
		String nextQuery = "";
235
		String emptyXml = resultXml + "<" + wrapName + "></" + wrapName + ">";
238
		String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
236 239
		Node resultNode = null;
237 240
		NodeList nodeList = null;
238 241
		String qUrlArgument = "";
......
251 254
			URL qUrl = new URL(query);
252 255
			
253 256
			if (this.authMethod == "bearer") {
254
				log.trace("authMethode before inputStream: " + resultXml);
257
				log.trace("authMethod before inputStream: " + resultXml);
255 258

  
256 259
				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
257 260
	        	conn.setRequestProperty("Authorization","Bearer "+authToken);
......
265 268
			resultStream = theHttpInputStream;
266 269
			if ("json".equals(resultFormatValue.toLowerCase())) {
267 270
				resultJson = IOUtils.toString(resultStream, "UTF-8");
268
				resultJson = syntaxConvertJsonKeyNamens(resultJson);
269
				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
270
				resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
271
				log.trace("before inputStream: " + resultXml);
272
				resultXml = XmlCleaner.cleanAllEntities(resultXml);
273
				log.trace("after cleaning: " + resultXml);
271
				resultXml = jsonUtils.convertToXML(resultJson);
274 272
				resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
275 273
			}
276 274

  
......
392 390

  
393 391
	}
394 392

  
395
	/**
396
	 * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
397
	 * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
398
	 * and work-around for the JSON to XML converting of org.json.XML-package.
399
	 *
400
	 * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
401
	 *
402
	 * @param jsonInput
403
	 * @return convertedJsonKeynameOutput
404
	 */
405
	private String syntaxConvertJsonKeyNamens(String jsonInput) {
406 393

  
407
		log.trace("before convertJsonKeyNames: " + jsonInput);
408
		// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
409
		// replace ' 's in JSON Namens with '_'
410
		while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
411
			jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
412
		}
413

  
414
		// replace forward-slash (sign '/' ) in JSON Names with '_'
415
		while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
416
			jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
417
		}
418

  
419
		// replace '(' in JSON Names with ''
420
		while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
421
			jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
422
		}
423

  
424
		// replace ')' in JSON Names with ''
425
		while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
426
			jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
427
		}
428

  
429
		// add prefix of startNumbers in JSON Keynames with 'n_'
430
		while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
431
			jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
432
		}
433
        // add prefix of only numbers in JSON Keynames with 'm_'
434
        while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
435
                jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
436
        }
437

  
438
		// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
439
		while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
440
			jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
441
		}
442

  
443
		// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
444
		//            while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
445
		//                jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
446
		//            }
447

  
448
		// replace '=' in JSON Keynames with '-'
449
		while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
450
			jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
451
		}
452

  
453
		log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
454
		return jsonInput;
455
	}
456

  
457
	/**
458
	 *
459
	 * https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
460
	 *          *
461
	 * @param bufferStr - XML string
462
	 * @return
463
	 */
464
	private static String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
465

  
466
		while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
467
			bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
468
		}
469

  
470
		// replace [#x10-#x1f] with ''
471
		//            while (bufferStr.matches(".*&#x1[0-9a-f].*")) {
472
		//                bufferStr = bufferStr.replaceAll("&#x1([0-9a-fA-F])", "");
473
		//            }
474

  
475
		return bufferStr;
476
	}
477 394
	
478 395
	private boolean isInteger(String s) {
479 396
		boolean isValidInteger = false;

Also available in: Unified diff