Project

General

Profile

« Previous | Next » 

Revision 58427

Added by Andreas Czerniak almost 4 years ago

RestCollector plugin - new resumptionType: deep-cursor

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIterator.java
64 64
	private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
65 65
	private int discoverResultSize = 0;
66 66
	private int pagination = 1;
67
	private String resultOffsetParam;
67 68

  
69
	/**
70
	 * RestIterator class
71
	 * 
72
	 * compatible to version before 1.3.33
73
	 * 
74
	 * @param baseUrl
75
	 * @param resumptionType
76
	 * @param resumptionParam
77
	 * @param resumptionXpath
78
	 * @param resultTotalXpath
79
	 * @param resultFormatParam
80
	 * @param resultFormatValue
81
	 * @param resultSizeParam
82
	 * @param resultSizeValueStr
83
	 * @param queryParams
84
	 * @param entityXpath
85
	 */
68 86
	public RestIterator(
69 87
			final String baseUrl,
70 88
			final String resumptionType,
......
78 96
			final String queryParams,
79 97
			final String entityXpath
80 98
	) {
81
//		this.baseUrl = baseUrl;
82
//		this.resumptionType = resumptionType;
83
//		this.resumptionParam = resumptionParam;
84
//		this.resultFormatValue = resultFormatValue;
85
//		this.queryParams = queryParams;
86
//		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
87
//
88
//		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
89
//		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
90
//
91
//		try {
92
//			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
93
//		} catch (Exception e) {
94
//			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
95
//		}
96
//		initQueue();
97
		this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "");
99
		this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "","");
98 100
	}
99 101

  
102
	/** RestIterator class
103
	 *  compatible to version 1.3.33
104
	 * @param baseUrl
105
	 * @param resumptionType
106
	 * @param resumptionParam
107
	 * @param resumptionXpath
108
	 * @param resultTotalXpath
109
	 * @param resultFormatParam
110
	 * @param resultFormatValue
111
	 * @param resultSizeParam
112
	 * @param resultSizeValueStr
113
	 * @param queryParams
114
	 * @param entityXpath
115
	 * @param authMethod
116
	 * @param authToken
117
	 */
100 118
	public RestIterator(
101 119
			final String baseUrl,
102 120
			final String resumptionType,
......
112 130
			final String authMethod,
113 131
			final String authToken
114 132
	) {
133
		this(baseUrl,resumptionType,resumptionParam,resumptionXpath,resultTotalXpath,resultFormatParam,resultFormatValue,resultSizeParam,resultSizeValueStr,queryParams,entityXpath,"", "","");
134
	}
135
	
136
	public RestIterator(
137
			final String baseUrl,
138
			final String resumptionType,
139
			final String resumptionParam,
140
			final String resumptionXpath,
141
			final String resultTotalXpath,
142
			final String resultFormatParam,
143
			final String resultFormatValue,
144
			final String resultSizeParam,
145
			final String resultSizeValueStr,
146
			final String queryParams,
147
			final String entityXpath,
148
			final String authMethod,
149
			final String authToken,
150
			final String resultOffsetParam
151
	) {
115 152
		this.baseUrl = baseUrl;
116 153
		this.resumptionType = resumptionType;
117 154
		this.resumptionParam = resumptionParam;
......
120 157
		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
121 158
		this.authMethod = authMethod;
122 159
		this.authToken = authToken;
160
		this.resultOffsetParam = resultOffsetParam;
123 161

  
124 162
		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
125 163
		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
......
255 293
				for (String arrayUrlArgStr : arrayQUrlArgument) {
256 294
					if (arrayUrlArgStr.startsWith(resumptionParam)) {
257 295
						String[] resumptionKeyValue = arrayUrlArgStr.split("=");
258
						urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
259
						log.debug("discover OldResumptionSize from Url: " + urlOldResumptionSize);
296
						if(isInteger(resumptionKeyValue[1])) {
297
							urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
298
							log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
299
						} else {
300
							log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
301
						}
260 302
					}
261 303
				}
262 304

  
......
275 317
				break;
276 318

  
277 319
			case "pagination":
278
			case "page":         // pagination, iterate over pages
320
			case "page":         // pagination, iterate over page numbers
279 321
				pagination += 1;
280 322
				if (nodeList != null) {
281 323
					discoverResultSize += nodeList.getLength();
......
287 329
				resumptionStr = Integer.toString(resumptionInt);
288 330
				break;
289 331

  
332
			case "deep-cursor":   // size of result items unknown, iterate over items  (for supporting deep cursor in solr)
333
				if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");}
334
				if (resultOffsetParam.length() < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultOffset' is less than 2");}
335

  
336
				resumptionStr = xprResumptionPath.evaluate(resultNode);
337
				queryParams.replace("cursor=*", "");
338
				queryParams.replace("&&",  "&");
339
				
340
				resumptionStr += "&" + resultOffsetParam + "=" + Integer.toString(resumptionInt);
341
				
342
				log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams);
343

  
344
				break;
345
			
290 346
			default:        // otherwise: abort
291 347
				// resultTotal = resumptionInt;
292 348
				break;
293 349
			}
294 350

  
295
			if (resultTotal == -1) {
296
				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
297
				if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; }           // to correct the upper bound
298
				log.info("resultTotal was -1 is now: " + resultTotal);
299
			}
300
			log.info("resultTotal: " + resultTotal);
301
			log.info("resInt: " + resumptionInt);
302
			if (resumptionInt <= resultTotal) {
303
				nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
304
			} else
305
				nextQuery = "";
306

  
307
			log.debug("nextQueryUrl: " + nextQuery);
308
			return nextQuery;
309

  
310 351
		} catch (Exception e) {
311 352
			log.error(e);
312 353
			throw new IllegalStateException("collection failed: " + e.getMessage());
354
		}			
355
			
356
		if (resultTotal == -1) {
357
			resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
358
			if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; }           // to correct the upper bound
359
			log.info("resultTotal was -1 is now: " + resultTotal);
313 360
		}
361
		log.info("resultTotal: " + resultTotal);
362
		log.info("resInt: " + resumptionInt);
363
		if (resumptionInt <= resultTotal) {
364
			nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
365
		} else
366
			nextQuery = "";
367

  
368
		log.debug("nextQueryUrl: " + nextQuery);
369
		return nextQuery;
370

  
371

  
314 372
	}
315 373

  
316 374
	/**
......
395 453

  
396 454
		return bufferStr;
397 455
	}
456
	
457
	private boolean isInteger(String s) {
458
		boolean isValidInteger = false;
459
		try {
460
			Integer.parseInt(s);
398 461

  
462
			// s is a valid integer
463

  
464
			isValidInteger = true;
465
		} catch (NumberFormatException ex) {
466
			// s is not an integer
467
		}
468

  
469
		return isValidInteger;
470
	}
471

  
399 472
}
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
191 191
                        <bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
192 192
							p:name="queryParams" />
193 193
                        <bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
194
							p:name="authMethod" />	
194
							p:name="authMethod" p:optional="true" />	
195 195
						<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
196
							p:name="authToken" />	
196
							p:name="authToken" p:optional="true" />	
197
                        <bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
198
							p:name="resultOffsetParam" p:optional="true" />								
197 199
						<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
198 200
							p:name="entityXpath" />
199 201
					</list>
modules/dnet-collector-plugins/trunk/pom.xml
7 7
	</parent>
8 8
	<groupId>eu.dnetlib</groupId>
9 9
	<artifactId>dnet-collector-plugins</artifactId>
10
	<version>1.3.34-SNAPSHOT</version>
10
	<version>1.3.35-SNAPSHOT</version>
11 11
	<scm>
12 12
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-collector-plugins/trunk</developerConnection>
13 13
	</scm>

Also available in: Unified diff