Project

General

Profile

« Previous | Next » 

Revision 53116

Added by Andreas Czerniak over 5 years ago

org.json.XML - update maven package version to 20180813
better unicode support

View differences:

RestIterator.java
35 35
import org.xml.sax.InputSource;
36 36

  
37 37
import eu.dnetlib.data.collector.plugins.oai.OaiIterator;
38
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
38 39
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
39 40

  
40 41
/**
......
144 145
				try {
145 146
                                        log.info("get Query: " + query);
146 147
					query = downloadPage(query);
147
                                        log.debug("next query from downloadPage method: " + query);
148
                                        log.debug("next queryURL from downloadPage(): " + query);
148 149
				} catch(CollectorServiceException e) {
149 150
                                        log.debug("CollectorPlugin.next()-Exception: " + e);
150 151
					throw new RuntimeException(e);
......
160 161
	 */
161 162
	private String downloadPage(String query) throws CollectorServiceException{
162 163
		String resultJson;
163
		String resultXml = "";
164
		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
164 165
		String nextQuery = "";
165
                String emptyXml = "<"+wrapName+"></"+wrapName+">";
166
                String emptyXml = resultXml + "<"+wrapName+"></"+wrapName+">";
166 167
                Node resultNode = null;
167 168
                NodeList nodeList = null;
168 169
                String qUrlArgument = "";
......
172 173
                        URL qUrl = new URL(query);
173 174
                        
174 175
                        resultStream = qUrl.openStream();
175
			if("json".equals(resultFormatValue.toLowerCase())){				
176
			if("json".equals(resultFormatValue.toLowerCase())){	
177
                            
176 178
				resultJson = IOUtils.toString(resultStream,"UTF-8");
177

  
178
				//TODO move regex definitions as constant fields
179
				// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
180 179
				resultJson = syntaxConvertJsonKeyNamens(resultJson);
181
//                                while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){
182
//					resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
183
//				}
184 180
				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
185
                                resultXml = org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
186
				log.trace(resultXml);
181
                                resultXml += org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
182
				log.trace("before inputStream: " + resultXml);
183
//                                resultXml = XmlCleaner.cleanAllEntities(resultXml);
184
//                                log.trace("after cleaning: " + resultXml);
187 185
				resultStream = IOUtils.toInputStream(resultXml,"UTF-8");
188 186
			}
189 187
			
......
213 211
                                            resumptionStr = Integer.toString(resumptionInt);
214 212
                                            break;
215 213
                                            
216
                            case "discover":   // size of result items unknown, iterate over items                                     
214
                            case "discover":   // size of result items unknown, iterate over items  (for openDOAR - 201808)                                   
217 215
                                            if (resultSizeValue < 2 ) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");}
218 216
                                            qUrlArgument = qUrl.getQuery();
219 217
                                            String[] arrayQUrlArgument = qUrlArgument.split("&");
......
280 278
        private String syntaxConvertJsonKeyNamens(String jsonInput) {
281 279

  
282 280
            log.trace("before convertJsonKeyNames: " + jsonInput);
281
            // pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
283 282
            // replace ' 's in JSON Namens with '_'
284 283
            while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
285 284
                jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");

Also available in: Unified diff