Revision 52970
Added by Andreas Czerniak over 5 years ago
RestIterator.java | ||
---|---|---|
34 | 34 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
35 | 35 |
|
36 | 36 |
/** |
37 |
* @author Jochen Schirrwagen, Aenne Loehden |
|
37 |
* @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak |
|
38 |
* @date 2018-08-06 |
|
38 | 39 |
* |
39 | 40 |
*/ |
40 | 41 |
public class RestIterator implements Iterator<String> { |
... | ... | |
47 | 48 |
private String resumptionParam; |
48 | 49 |
private String resultFormatValue; |
49 | 50 |
private String queryParams; |
50 |
private int resultSizeValue = 100;
|
|
51 |
private int resultSizeValue; |
|
51 | 52 |
private int resumptionInt = 0; // integer resumption token (first record to harvest) |
52 | 53 |
private int resultTotal = -1; |
53 | 54 |
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest or token scanned from results) |
... | ... | |
71 | 72 |
final String resultFormatParam, |
72 | 73 |
final String resultFormatValue, |
73 | 74 |
final String resultSizeParam, |
75 |
final String resultSizeValue, |
|
74 | 76 |
final String queryParams, |
75 | 77 |
final String entityXpath |
76 | 78 |
) { |
... | ... | |
93 | 95 |
|
94 | 96 |
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException{ |
95 | 97 |
transformer = TransformerFactory.newInstance().newTransformer(); |
96 |
transformer.setOutputProperty(OutputKeys.INDENT,"yes"); |
|
97 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3"); |
|
98 |
xpath = XPathFactory.newInstance().newXPath(); |
|
98 |
transformer.setOutputProperty(OutputKeys.INDENT,"yes");
|
|
99 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3");
|
|
100 |
xpath = XPathFactory.newInstance().newXPath();
|
|
99 | 101 |
xprResultTotalPath = xpath.compile(resultTotalXpath); |
100 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath); |
|
101 |
xprEntity = xpath.compile(entityXpath); |
|
102 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
|
|
103 |
xprEntity = xpath.compile(entityXpath);
|
|
102 | 104 |
} |
103 | 105 |
|
104 | 106 |
private void initQueue() { |
... | ... | |
149 | 151 |
String nextQuery = ""; |
150 | 152 |
try { |
151 | 153 |
resultStream = new URL(query).openStream(); |
152 |
if("json".equals(resultFormatValue)){ |
|
154 |
if("json".equals(resultFormatValue.toLowerCase())){
|
|
153 | 155 |
resultJson = IOUtils.toString(resultStream,"UTF-8"); |
154 | 156 |
|
155 | 157 |
//TODO move regex definitions as constant fields |
156 | 158 |
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml) |
157 |
while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){ |
|
158 |
resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
159 |
} |
|
159 |
resultJson = syntaxConvertJsonKeyNamens(resultJson); |
|
160 |
// while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){ |
|
161 |
// resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
162 |
// } |
|
160 | 163 |
org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson); |
161 | 164 |
resultXml = org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element |
162 | 165 |
// log.info(resultXml); |
... | ... | |
173 | 176 |
} |
174 | 177 |
|
175 | 178 |
resumptionInt += resultSizeValue; |
176 |
if("scan".equals(resumptionType)) { resumptionStr = xprResumptionPath.evaluate(resultNode);} |
|
177 |
if("count".equals(resumptionType)){ resumptionStr = Integer.toString(resumptionInt); } |
|
178 |
|
|
179 |
|
|
180 |
switch(resumptionType.toLowerCase()) { |
|
181 |
case "scan": |
|
182 |
resumptionStr = xprResumptionPath.evaluate(resultNode); |
|
183 |
break; |
|
184 |
case "count": |
|
185 |
resumptionStr = Integer.toString(resumptionInt); |
|
186 |
break; |
|
187 |
case "discover": |
|
188 |
String emptyXml = "<"+wrapName+"></"+wrapName+">"; |
|
189 |
if( (emptyXml.toLowerCase()).equals(resultXml.toLowerCase()) ) { |
|
190 |
resumptionStr = ""; |
|
191 |
resultTotal = resumptionInt; |
|
192 |
} else { |
|
193 |
resumptionStr = Integer.toString(resumptionInt); |
|
194 |
resultTotal = resumptionInt+1; |
|
195 |
} |
|
196 |
break; |
|
197 |
default: |
|
198 |
} |
|
199 |
/* if("scan".equals(resumptionType.toLowerCase())) { resumptionStr = xprResumptionPath.evaluate(resultNode);} |
|
200 |
if("count".equals(resumptionType.toLowerCase())){ resumptionStr = Integer.toString(resumptionInt); } |
|
201 |
*/ |
|
179 | 202 |
if (resultTotal == -1) { |
180 | 203 |
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode)); |
181 | 204 |
log.info("resultTotal: " + resultTotal); |
... | ... | |
193 | 216 |
throw new IllegalStateException("collection failed: " + e.getMessage()); |
194 | 217 |
} |
195 | 218 |
} |
219 |
|
|
220 |
/** |
|
221 |
* convert in Json-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to '' |
|
222 |
* |
|
223 |
* @param jsonInput |
|
224 |
* @return |
|
225 |
*/ |
|
226 |
private String syntaxConvertJsonKeyNamens(String jsonInput) { |
|
196 | 227 |
|
228 |
// replace ' 's in JSON Namens with '_' |
|
229 |
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) { |
|
230 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":"); |
|
231 |
} |
|
232 |
|
|
233 |
// replace forward-slash (sign '/' ) in JSON Names with '_' |
|
234 |
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) { |
|
235 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":"); |
|
236 |
} |
|
237 |
|
|
238 |
// replace '(' in JSON Names with '' |
|
239 |
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) { |
|
240 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":"); |
|
241 |
} |
|
242 |
// replace ')' in JSON Names with '' |
|
243 |
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) { |
|
244 |
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":"); |
|
245 |
} |
|
246 |
|
|
247 |
return jsonInput; |
|
248 |
} |
|
249 |
|
|
197 | 250 |
} |
Also available in: Unified diff
Changes in the Rest_Json CollectorPlugin with enhancements for the new OpenDOAR API at JISC under https://v2.sherpa.ac.uk/opendoar/