64 |
64 |
private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
|
65 |
65 |
private int discoverResultSize = 0;
|
66 |
66 |
private int pagination = 1;
|
|
67 |
private String resultOffsetParam;
|
67 |
68 |
|
|
69 |
/**
|
|
70 |
* RestIterator class
|
|
71 |
*
|
|
72 |
* compatible to version before 1.3.33
|
|
73 |
*
|
|
74 |
* @param baseUrl
|
|
75 |
* @param resumptionType
|
|
76 |
* @param resumptionParam
|
|
77 |
* @param resumptionXpath
|
|
78 |
* @param resultTotalXpath
|
|
79 |
* @param resultFormatParam
|
|
80 |
* @param resultFormatValue
|
|
81 |
* @param resultSizeParam
|
|
82 |
* @param resultSizeValueStr
|
|
83 |
* @param queryParams
|
|
84 |
* @param entityXpath
|
|
85 |
*/
|
68 |
86 |
public RestIterator(
|
69 |
87 |
final String baseUrl,
|
70 |
88 |
final String resumptionType,
|
... | ... | |
78 |
96 |
final String queryParams,
|
79 |
97 |
final String entityXpath
|
80 |
98 |
) {
|
81 |
|
// this.baseUrl = baseUrl;
|
82 |
|
// this.resumptionType = resumptionType;
|
83 |
|
// this.resumptionParam = resumptionParam;
|
84 |
|
// this.resultFormatValue = resultFormatValue;
|
85 |
|
// this.queryParams = queryParams;
|
86 |
|
// this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
|
87 |
|
//
|
88 |
|
// queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
|
89 |
|
// querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
|
90 |
|
//
|
91 |
|
// try {
|
92 |
|
// initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
|
93 |
|
// } catch (Exception e) {
|
94 |
|
// throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
|
95 |
|
// }
|
96 |
|
// initQueue();
|
97 |
|
this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "");
|
|
99 |
this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "","");
|
98 |
100 |
}
|
99 |
101 |
|
|
102 |
/** RestIterator class
|
|
103 |
* compatible to version 1.3.33
|
|
104 |
* @param baseUrl
|
|
105 |
* @param resumptionType
|
|
106 |
* @param resumptionParam
|
|
107 |
* @param resumptionXpath
|
|
108 |
* @param resultTotalXpath
|
|
109 |
* @param resultFormatParam
|
|
110 |
* @param resultFormatValue
|
|
111 |
* @param resultSizeParam
|
|
112 |
* @param resultSizeValueStr
|
|
113 |
* @param queryParams
|
|
114 |
* @param entityXpath
|
|
115 |
* @param authMethod
|
|
116 |
* @param authToken
|
|
117 |
*/
|
100 |
118 |
public RestIterator(
|
101 |
119 |
final String baseUrl,
|
102 |
120 |
final String resumptionType,
|
... | ... | |
112 |
130 |
final String authMethod,
|
113 |
131 |
final String authToken
|
114 |
132 |
) {
|
|
133 |
this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "","");
|
|
134 |
}
|
|
135 |
|
|
136 |
public RestIterator(
|
|
137 |
final String baseUrl,
|
|
138 |
final String resumptionType,
|
|
139 |
final String resumptionParam,
|
|
140 |
final String resumptionXpath,
|
|
141 |
final String resultTotalXpath,
|
|
142 |
final String resultFormatParam,
|
|
143 |
final String resultFormatValue,
|
|
144 |
final String resultSizeParam,
|
|
145 |
final String resultSizeValueStr,
|
|
146 |
final String queryParams,
|
|
147 |
final String entityXpath,
|
|
148 |
final String authMethod,
|
|
149 |
final String authToken,
|
|
150 |
final String resultOffsetParam
|
|
151 |
) {
|
115 |
152 |
this.baseUrl = baseUrl;
|
116 |
153 |
this.resumptionType = resumptionType;
|
117 |
154 |
this.resumptionParam = resumptionParam;
|
... | ... | |
120 |
157 |
this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
|
121 |
158 |
this.authMethod = authMethod;
|
122 |
159 |
this.authToken = authToken;
|
|
160 |
this.resultOffsetParam = resultOffsetParam;
|
123 |
161 |
|
124 |
162 |
queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
|
125 |
163 |
querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
|
... | ... | |
255 |
293 |
for (String arrayUrlArgStr : arrayQUrlArgument) {
|
256 |
294 |
if (arrayUrlArgStr.startsWith(resumptionParam)) {
|
257 |
295 |
String[] resumptionKeyValue = arrayUrlArgStr.split("=");
|
258 |
|
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
|
259 |
|
log.debug("discover OldResumptionSize from Url: " + urlOldResumptionSize);
|
|
296 |
if(isInteger(resumptionKeyValue[1])) {
|
|
297 |
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
|
|
298 |
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
|
|
299 |
} else {
|
|
300 |
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
|
|
301 |
}
|
260 |
302 |
}
|
261 |
303 |
}
|
262 |
304 |
|
... | ... | |
275 |
317 |
break;
|
276 |
318 |
|
277 |
319 |
case "pagination":
|
278 |
|
case "page": // pagination, iterate over pages
|
|
320 |
case "page": // pagination, iterate over page numbers
|
279 |
321 |
pagination += 1;
|
280 |
322 |
if (nodeList != null) {
|
281 |
323 |
discoverResultSize += nodeList.getLength();
|
... | ... | |
287 |
329 |
resumptionStr = Integer.toString(resumptionInt);
|
288 |
330 |
break;
|
289 |
331 |
|
|
332 |
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in solr)
|
|
333 |
if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");}
|
|
334 |
if (resultOffsetParam.length() < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultOffset' is less than 2");}
|
|
335 |
|
|
336 |
resumptionStr = xprResumptionPath.evaluate(resultNode);
|
|
337 |
queryParams.replace("cursor=*", "");
|
|
338 |
queryParams.replace("&&", "&");
|
|
339 |
|
|
340 |
resumptionStr += "&" + resultOffsetParam + "=" + Integer.toString(resumptionInt);
|
|
341 |
|
|
342 |
log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams);
|
|
343 |
|
|
344 |
break;
|
|
345 |
|
290 |
346 |
default: // otherwise: abort
|
291 |
347 |
// resultTotal = resumptionInt;
|
292 |
348 |
break;
|
293 |
349 |
}
|
294 |
350 |
|
295 |
|
if (resultTotal == -1) {
|
296 |
|
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
|
297 |
|
if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; } // to correct the upper bound
|
298 |
|
log.info("resultTotal was -1 is now: " + resultTotal);
|
299 |
|
}
|
300 |
|
log.info("resultTotal: " + resultTotal);
|
301 |
|
log.info("resInt: " + resumptionInt);
|
302 |
|
if (resumptionInt <= resultTotal) {
|
303 |
|
nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
|
304 |
|
} else
|
305 |
|
nextQuery = "";
|
306 |
|
|
307 |
|
log.debug("nextQueryUrl: " + nextQuery);
|
308 |
|
return nextQuery;
|
309 |
|
|
310 |
351 |
} catch (Exception e) {
|
311 |
352 |
log.error(e);
|
312 |
353 |
throw new IllegalStateException("collection failed: " + e.getMessage());
|
|
354 |
}
|
|
355 |
|
|
356 |
if (resultTotal == -1) {
|
|
357 |
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
|
|
358 |
if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; } // to correct the upper bound
|
|
359 |
log.info("resultTotal was -1 is now: " + resultTotal);
|
313 |
360 |
}
|
|
361 |
log.info("resultTotal: " + resultTotal);
|
|
362 |
log.info("resInt: " + resumptionInt);
|
|
363 |
if (resumptionInt <= resultTotal) {
|
|
364 |
nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
|
|
365 |
} else
|
|
366 |
nextQuery = "";
|
|
367 |
|
|
368 |
log.debug("nextQueryUrl: " + nextQuery);
|
|
369 |
return nextQuery;
|
|
370 |
|
|
371 |
|
314 |
372 |
}
|
315 |
373 |
|
316 |
374 |
/**
|
... | ... | |
395 |
453 |
|
396 |
454 |
return bufferStr;
|
397 |
455 |
}
|
|
456 |
|
|
457 |
private boolean isInteger(String s) {
|
|
458 |
boolean isValidInteger = false;
|
|
459 |
try {
|
|
460 |
Integer.parseInt(s);
|
398 |
461 |
|
|
462 |
// s is a valid integer
|
|
463 |
|
|
464 |
isValidInteger = true;
|
|
465 |
} catch (NumberFormatException ex) {
|
|
466 |
// s is not an integer
|
|
467 |
}
|
|
468 |
|
|
469 |
return isValidInteger;
|
|
470 |
}
|
|
471 |
|
399 |
472 |
}
|
RestCollector plugin - new resumptionType: deep-cursor