Project

General

Profile

« Previous | Next » 

Revision 61031

REST plugin accepts optional Request Headers parameters as json map, e.g. {Accept:application/json}. This is useful to support PostgREST endpoints: they need a param in the header to return proper json. request headers must also be exploited for the pagination (not implemented in this commit). More details on PostgREST at https://postgrest.org/en/stable/api.html.
This commit also include minor refactoring

View differences:

RestIterator.java
13 13
import java.nio.charset.StandardCharsets;
14 14
import java.net.HttpURLConnection;
15 15
import java.util.Iterator;
16
import java.util.Map;
16 17
import java.util.Queue;
17 18
import java.util.concurrent.PriorityBlockingQueue;
18 19
import javax.xml.transform.OutputKeys;
......
23 24
import javax.xml.transform.stream.StreamResult;
24 25
import javax.xml.xpath.*;
25 26

  
27
import com.google.common.collect.Maps;
26 28
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
27 29
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
28 30
import org.apache.commons.io.IOUtils;
29 31
import org.apache.commons.lang3.StringUtils;
30 32
import org.apache.commons.logging.Log;
31 33
import org.apache.commons.logging.LogFactory;
34
import org.apache.http.client.methods.CloseableHttpResponse;
35
import org.apache.http.client.methods.HttpGet;
36
import org.apache.http.impl.client.HttpClients;
32 37
import org.w3c.dom.Node;
33 38
import org.w3c.dom.NodeList;
34 39
import org.xml.sax.InputSource;
......
36 41
/**
37 42
 * @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak, Alessia Bardi, Miriam Baglioni
38 43
 * @date 2020-04-09
39
 *
40 44
 */
41 45
public class RestIterator implements Iterator<String> {
42
	private final String AUTHBASIC = "basic";
46
    private final String AUTHBASIC = "basic";
43 47

  
44
	// TODO: clean up the comments of replaced source code
45
	private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
48
    // TODO: clean up the comments of replaced source code
49
    private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
50
    private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
51
    private static final String EMPTY_XML = XML_HEADER + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
52
    private JsonUtils jsonUtils;
46 53

  
47
	private JsonUtils jsonUtils;
54
    private String baseUrl;
55
    private String resumptionType;
56
    private String resumptionParam;
57
    private String resultFormatValue;
58
    private String queryParams = "";
59
    private int resultSizeValue;
60
    private int resumptionInt = 0;            // integer resumption token (first record to harvest)
61
    private int resultTotal = -1;
62
    private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
63
    private InputStream resultStream;
64
    private Transformer transformer;
65
    private XPath xpath;
66
    private String query;
67
    private XPathExpression xprResultTotalPath;
68
    private XPathExpression xprResumptionPath;
69
    private XPathExpression xprEntity;
70
    private String queryFormat;
71
    private String querySize;
72
    private String authMethod;
73
    private String authToken;
74
    private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
75
    private int discoverResultSize = 0;
76
    private int pagination = 1;
77
    /*
78
    While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json.
79
    useful for cases when the target API expects a resultFormatValue != json, but the results are returned in json.
80
    An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
81
     */
82
    private String resultOutputFormat;
83
    /*
84
    Can be used to set additional request headers, like for content negotiation
85
     */
86
    private Map<String, String> requestHeaders;
48 87

  
49
	private String baseUrl;
50
	private String resumptionType;
51
	private String resumptionParam;
52
	private String resultFormatValue;
53
	private String queryParams = "";
54
	private int resultSizeValue;
55
	private int resumptionInt = 0;            // integer resumption token (first record to harvest)
56
	private int resultTotal = -1;
57
	private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
58
	private InputStream resultStream;
59
	private Transformer transformer;
60
	private XPath xpath;
61
	private String query;
62
	private XPathExpression xprResultTotalPath;
63
	private XPathExpression xprResumptionPath;
64
	private XPathExpression xprEntity;
65
	private String queryFormat;
66
	private String querySize;
67
	private String authMethod;
68
	private String authToken;
69
	private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
70
	private int discoverResultSize = 0;
71
	private int pagination = 1;
72
	/*
73
	While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json.
74
	useful for cases when the target API expects a resultFormatValue != json, but the results are returned in json.
75
	An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
76
	 */
77
	private  String resultOutputFormat;
78
	
79 88

  
80
	public RestIterator(
81
			final String baseUrl,
82
			final String resumptionType,
83
			final String resumptionParam,
84
			final String resumptionXpath,
85
			final String resultTotalXpath,
86
			final String resultFormatParam,
87
			final String resultFormatValue,
88
			final String resultSizeParam,
89
			final String resultSizeValueStr,
90
			final String queryParams,
91
			final String entityXpath,
92
			final String authMethod,
93
			final String authToken,
94
			final String resultOutputFormat
95
	) {
96
		this.jsonUtils = new JsonUtils();
97
		this.baseUrl = baseUrl;
98
		this.resumptionType = resumptionType;
99
		this.resumptionParam = resumptionParam;
100
		this.resultFormatValue = resultFormatValue;
101
		this.queryParams = queryParams;
102
		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
103
		this.authMethod = authMethod;
104
		this.authToken = authToken;
105
		this.resultOutputFormat = resultOutputFormat;
89
    public RestIterator(
90
            final String baseUrl,
91
            final String resumptionType,
92
            final String resumptionParam,
93
            final String resumptionXpath,
94
            final String resultTotalXpath,
95
            final String resultFormatParam,
96
            final String resultFormatValue,
97
            final String resultSizeParam,
98
            final String resultSizeValueStr,
99
            final String queryParams,
100
            final String entityXpath,
101
            final String authMethod,
102
            final String authToken,
103
            final String resultOutputFormat,
104
            final Map<String, String> requestHeaders
105
    ) {
106
        this.jsonUtils = new JsonUtils();
107
        this.baseUrl = baseUrl;
108
        this.resumptionType = resumptionType;
109
        this.resumptionParam = resumptionParam;
110
        this.resultFormatValue = resultFormatValue;
111
        this.queryParams = queryParams;
112
        this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
113
        this.authMethod = authMethod;
114
        this.authToken = authToken;
115
        this.resultOutputFormat = resultOutputFormat;
116
        this.requestHeaders = requestHeaders != null ? requestHeaders : Maps.newHashMap();
106 117

  
107
		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
108
		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
118
        queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
119
        querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
109 120

  
110
		try {
111
			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
112
		} catch (Exception e) {
113
			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
114
		}
115
		initQueue();
116
	}
117
	
118
	
119
	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
120
			throws TransformerConfigurationException, XPathExpressionException {
121
		transformer = TransformerFactory.newInstance().newTransformer();
122
		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
123
		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
124
		xpath = XPathFactory.newInstance().newXPath();
125
		xprResultTotalPath = xpath.compile(resultTotalXpath);
126
		xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
127
		xprEntity = xpath.compile(entityXpath);
128
	}
121
        try {
122
            initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
123
        } catch (Exception e) {
124
            throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
125
        }
126
        initQueue();
127
    }
129 128

  
130
	private void initQueue() {
131
		if( queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) {
132
			query = baseUrl;
133
		} else {
134
			query = baseUrl + "?" + queryParams + querySize + queryFormat;
135
		}
136 129

  
137
		log.info("REST calls starting with "+query);
138
	}
130
    private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
131
            throws TransformerConfigurationException, XPathExpressionException {
132
        transformer = TransformerFactory.newInstance().newTransformer();
133
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
134
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
135
        xpath = XPathFactory.newInstance().newXPath();
136
        xprResultTotalPath = xpath.compile(resultTotalXpath);
137
        xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
138
        xprEntity = xpath.compile(entityXpath);
139
    }
139 140

  
140
	private void disconnect() {
141
		// TODO close inputstream
142
	}
141
    private void initQueue() {
142
        if (queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) {
143
            query = baseUrl;
144
        } else {
145
            query = baseUrl + "?" + queryParams + querySize + queryFormat;
146
        }
143 147

  
144
	/* (non-Javadoc)
145
	 * @see java.util.Iterator#hasNext()
146
	 */
147
	@Override
148
	public boolean hasNext() {
149
		if (recordQueue.isEmpty() && query.isEmpty()) {
150
			disconnect();
151
			return false;
152
		} else {
153
			return true;
154
		}
155
	}
148
        log.info("REST calls starting with " + query);
149
    }
156 150

  
157
	/* (non-Javadoc)
158
	 * @see java.util.Iterator#next()
159
	 */
160
	@Override
161
	public String next() {
162
		synchronized (recordQueue) {
163
			while (recordQueue.isEmpty() && !query.isEmpty()) {
164
				try {
165
					log.debug("get Query: " + query);
166
					query = downloadPage(query);
167
					log.debug("next queryURL from downloadPage(): " + query);
168
				} catch (CollectorServiceException e) {
169
					log.debug("CollectorPlugin.next()-Exception: " + e);
170
					throw new RuntimeException(e);
171
				}
172
			}
173
			return recordQueue.poll();
174
		}
175
	}
151
    private void disconnect() {
152
        // TODO close inputstream
153
    }
176 154

  
177
	/*
178
	 * download page and return nextQuery
179
	 */
180
	private String downloadPage(String query) throws CollectorServiceException {
181
		String resultJson;
182
		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
183
		String nextQuery = "";
184
		String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
185
		Node resultNode = null;
186
		NodeList nodeList = null;
187
		String qUrlArgument = "";
188
		int urlOldResumptionSize = 0;
189
		InputStream theHttpInputStream;
190
		
191
		// check if cursor=* is initial set otherwise add it to the queryParam URL
192
		if( resumptionType.equalsIgnoreCase("deep-cursor") ) {
193
			log.debug("check resumptionType deep-cursor and check cursor=*?" + query);
194
			if(!query.contains("&cursor=")) {
195
				query += "&cursor=*";
196
			}
197
		}
155
    /* (non-Javadoc)
156
     * @see java.util.Iterator#hasNext()
157
     */
158
    @Override
159
    public boolean hasNext() {
160
        if (recordQueue.isEmpty() && query.isEmpty()) {
161
            disconnect();
162
            return false;
163
        } else {
164
            return true;
165
        }
166
    }
198 167

  
199
		try {
200
			URL qUrl = new URL(query);
201
			log.debug("authMethod :" + authMethod);
202
			if (this.authMethod == "bearer") {
203
				log.trace("authMethod before inputStream: " + resultXml);
204
				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
205
	        	conn.setRequestProperty("Authorization","Bearer "+authToken);
206
	        	conn.setRequestProperty("Content-Type","application/json");
207
	        	conn.setRequestMethod("GET");
208
	        	theHttpInputStream = conn.getInputStream();
209
			}else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
210
				log.trace("authMethod before inputStream: " + resultXml);
211
				HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
212
				conn.setRequestProperty("Authorization","Basic "+authToken);
213
				conn.setRequestProperty("accept","application/xml");
214
				conn.setRequestMethod("GET");
215
				theHttpInputStream = conn.getInputStream();
216
			} else {
217
				theHttpInputStream = qUrl.openStream();
218
			}
219
			
220
			resultStream = theHttpInputStream;
221
			if ("json".equals(resultOutputFormat)) {
222
				resultJson = IOUtils.toString(resultStream, "UTF-8");
223
				resultXml = jsonUtils.convertToXML(resultJson);
224
				resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
225
			}
168
    /* (non-Javadoc)
169
     * @see java.util.Iterator#next()
170
     */
171
    @Override
172
    public String next() {
173
        synchronized (recordQueue) {
174
            while (recordQueue.isEmpty() && !query.isEmpty()) {
175
                try {
176
                    log.debug("get Query: " + query);
177
                    query = downloadPage(query);
178
                    log.debug("next queryURL from downloadPage(): " + query);
179
                } catch (CollectorServiceException e) {
180
                    log.debug("CollectorPlugin.next()-Exception: " + e);
181
                    throw new RuntimeException(e);
182
                }
183
            }
184
            return recordQueue.poll();
185
        }
186
    }
226 187

  
227
			if (!(emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) {
228
				resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
229
				nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
230
				log.debug("nodeList.length: " + nodeList.getLength());
231
				for (int i = 0; i < nodeList.getLength(); i++) {
232
					StringWriter sw = new StringWriter();
233
					transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
234
					String toEnqueue = sw.toString();
235
					if(toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)){
236
						log.warn("The following record resulted in empty item for the feeding queue: "+resultXml);
237
					}
238
					else{ recordQueue.add(sw.toString());}
239
				}
240
			} else { log.warn("resultXml is equal with emptyXml"); }
188
    /*
189
     * download page and return nextQuery
190
     */
191
    private String downloadPage(String query) throws CollectorServiceException {
192
        String resultJson;
193
        String resultXml = XML_HEADER;
194
        String nextQuery = "";
195
        Node resultNode = null;
196
        NodeList nodeList = null;
197
        String qUrlArgument = "";
198
        int urlOldResumptionSize = 0;
199
        InputStream theHttpInputStream;
241 200

  
242
			resumptionInt += resultSizeValue;
201
        // check if cursor=* is initial set otherwise add it to the queryParam URL
202
        if (resumptionType.equalsIgnoreCase("deep-cursor")) {
203
            log.debug("check resumptionType deep-cursor and check cursor=*?" + query);
204
            if (!query.contains("&cursor=")) {
205
                query += "&cursor=*";
206
            }
207
        }
243 208

  
244
			switch (resumptionType.toLowerCase()) {
245
			case "scan":    // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
246
				resumptionStr = xprResumptionPath.evaluate(resultNode);
247
				break;
209
        try {
210
            URL qUrl = new URL(query);
211
            log.debug("authMethod :" + authMethod);
212
            if (this.authMethod == "bearer") {
213
                log.trace("authMethod before inputStream: " + resultXml);
214
                requestHeaders.put("Authorization", "Bearer " + authToken);
215
                requestHeaders.put("Content-Type", "application/json");
216
            } else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) {
217
                log.trace("authMethod before inputStream: " + resultXml);
218
                requestHeaders.put("Authorization", "Basic " + authToken);
219
                requestHeaders.put("accept", "application/xml");
220
            }
248 221

  
249
			case "count":   // begin at one step for all records, iterate over items
250
				resumptionStr = Integer.toString(resumptionInt);
251
				break;
222
            HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
223
            conn.setRequestMethod("GET");
224
            this.setRequestHeader(conn);
225
            resultStream = conn.getInputStream();
252 226

  
253
			case "discover":   // size of result items unknown, iterate over items  (for openDOAR - 201808)
254
				if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");}
255
				qUrlArgument = qUrl.getQuery();
256
				String[] arrayQUrlArgument = qUrlArgument.split("&");
257
				for (String arrayUrlArgStr : arrayQUrlArgument) {
258
					if (arrayUrlArgStr.startsWith(resumptionParam)) {
259
						String[] resumptionKeyValue = arrayUrlArgStr.split("=");
260
						if(isInteger(resumptionKeyValue[1])) {
261
							urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
262
							log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
263
						} else {
264
							log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
265
						}
266
					}
267
				}
227
            if ("json".equals(resultOutputFormat)) {
228
                resultJson = IOUtils.toString(resultStream, "UTF-8");
229
                resultXml = jsonUtils.convertToXML(resultJson);
230
                resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
231
            }
268 232

  
269
				if (((emptyXml.toLowerCase()).equals(resultXml.toLowerCase()))
270
						|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue))
271
				) {
272
					// resumptionStr = "";
273
					if (nodeList != null) { discoverResultSize += nodeList.getLength(); }
274
					resultTotal = discoverResultSize;
275
				} else {
276
					resumptionStr = Integer.toString(resumptionInt);
277
					resultTotal = resumptionInt + 1;
278
					if (nodeList != null) { discoverResultSize += nodeList.getLength(); }
279
				}
280
				log.debug("discoverResultSize:  " + discoverResultSize);
281
				break;
233
            if (!isEmptyXml(resultXml)) {
234
                resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
235
                nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
236
                log.debug("nodeList.length: " + nodeList.getLength());
237
                for (int i = 0; i < nodeList.getLength(); i++) {
238
                    StringWriter sw = new StringWriter();
239
                    transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
240
                    String toEnqueue = sw.toString();
241
                    if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) {
242
                        log.warn("The following record resulted in empty item for the feeding queue: " + resultXml);
243
                    } else {
244
                        recordQueue.add(sw.toString());
245
                    }
246
                }
247
            } else {
248
                log.warn("resultXml is equal with emptyXml");
249
            }
282 250

  
283
			case "pagination":
284
			case "page":         // pagination, iterate over page numbers
285
				pagination += 1;
286
				if (nodeList != null) {
287
					discoverResultSize += nodeList.getLength();
288
				} else {
289
					resultTotal = discoverResultSize;
290
					pagination = discoverResultSize;
291
				}
292
				resumptionInt = pagination;
293
				resumptionStr = Integer.toString(resumptionInt);
294
				break;
251
            resumptionInt += resultSizeValue;
295 252

  
296
			case "deep-cursor":   // size of result items unknown, iterate over items  (for supporting deep cursor in solr)
297
				// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");}
253
            switch (resumptionType.toLowerCase()) {
254
                case "scan":    // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
255
                    resumptionStr = xprResumptionPath.evaluate(resultNode);
256
                    break;
298 257

  
299
				resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
300
				queryParams = queryParams.replace("&cursor=*", "");
301
				
302
				// terminating if length of nodeList is 0
303
				if( (nodeList != null) && (nodeList.getLength() < discoverResultSize) ) {
304
					resumptionInt += ( nodeList.getLength() + 1 - resultSizeValue);
305
				} else {
306
					resumptionInt += (nodeList.getLength() - resultSizeValue);	// subtract the resultSizeValue because the iteration is over real length and the resultSizeValue is added before the switch()
307
				}
308
				
309
				discoverResultSize = nodeList.getLength();
310
				
311
				log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " resumptionLengthIncreased: " + resumptionInt);
258
                case "count":   // begin at one step for all records, iterate over items
259
                    resumptionStr = Integer.toString(resumptionInt);
260
                    break;
312 261

  
313
				break;
314
			
315
			default:        // otherwise: abort
316
				// resultTotal = resumptionInt;
317
				break;
318
			}
262
                case "discover":   // size of result items unknown, iterate over items  (for openDOAR - 201808)
263
                    if (resultSizeValue < 2) {
264
                        throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");
265
                    }
266
                    qUrlArgument = qUrl.getQuery();
267
                    String[] arrayQUrlArgument = qUrlArgument.split("&");
268
                    for (String arrayUrlArgStr : arrayQUrlArgument) {
269
                        if (arrayUrlArgStr.startsWith(resumptionParam)) {
270
                            String[] resumptionKeyValue = arrayUrlArgStr.split("=");
271
                            if (isInteger(resumptionKeyValue[1])) {
272
                                urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
273
                                log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
274
                            } else {
275
                                log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
276
                            }
277
                        }
278
                    }
319 279

  
320
		} catch (Exception e) {
321
			log.error(e);
322
			throw new IllegalStateException("collection failed: " + e.getMessage());
323
		}			
324
			
325
		try {
326
			if (resultTotal == -1) {
327
				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
328
				if (resumptionType.toLowerCase().equals("page") && !AUTHBASIC.equalsIgnoreCase(authMethod)) { resultTotal += 1; }           // to correct the upper bound
329
				log.info("resultTotal was -1 is now: " + resultTotal);
330
		}
331
		} catch(Exception e) {
332
			log.error(e);
333
			throw new IllegalStateException("downloadPage() resultTotal couldn't parse: " + e.getMessage());
334
		}
335
		log.debug("resultTotal: " + resultTotal);
336
		log.debug("resInt: " + resumptionInt);
337
		if (resumptionInt <= resultTotal) {
338
			nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
339
		} else {
340
			nextQuery = "";
341
			// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; }    	// correct the resumptionInt and prevent a NullPointer Exception at mdStore 
342
		}
343
		log.debug("downloadPage() nextQueryUrl: " + nextQuery);
344
		return nextQuery;
280
                    if (isEmptyXml(resultXml) || ((nodeList != null) && (nodeList.getLength() < resultSizeValue))
281
                    ) {
282
                        // resumptionStr = "";
283
                        if (nodeList != null) {
284
                            discoverResultSize += nodeList.getLength();
285
                        }
286
                        resultTotal = discoverResultSize;
287
                    } else {
288
                        resumptionStr = Integer.toString(resumptionInt);
289
                        resultTotal = resumptionInt + 1;
290
                        if (nodeList != null) {
291
                            discoverResultSize += nodeList.getLength();
292
                        }
293
                    }
294
                    log.debug("discoverResultSize:  " + discoverResultSize);
295
                    break;
345 296

  
297
                case "pagination":
298
                case "page":         // pagination, iterate over page numbers
299
                    pagination += 1;
300
                    if (nodeList != null) {
301
                        discoverResultSize += nodeList.getLength();
302
                    } else {
303
                        resultTotal = discoverResultSize;
304
                        pagination = discoverResultSize;
305
                    }
306
                    resumptionInt = pagination;
307
                    resumptionStr = Integer.toString(resumptionInt);
308
                    break;
346 309

  
347
	}
310
                case "deep-cursor":   // size of result items unknown, iterate over items  (for supporting deep cursor in solr)
311
                    // isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");}
348 312

  
313
                    resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode));
314
                    queryParams = queryParams.replace("&cursor=*", "");
349 315

  
350
	
351
	private boolean isInteger(String s) {
352
		boolean isValidInteger = false;
353
		try {
354
			Integer.parseInt(s);
316
                    // terminating if length of nodeList is 0
317
                    if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) {
318
                        resumptionInt += (nodeList.getLength() + 1 - resultSizeValue);
319
                    } else {
320
                        resumptionInt += (nodeList.getLength() - resultSizeValue);    // subtract the resultSizeValue because the iteration is over real length and the resultSizeValue is added before the switch()
321
                    }
355 322

  
356
			// s is a valid integer
323
                    discoverResultSize = nodeList.getLength();
357 324

  
358
			isValidInteger = true;
359
		} catch (NumberFormatException ex) {
360
			// s is not an integer
361
		}
325
                    log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " resumptionLengthIncreased: " + resumptionInt);
362 326

  
363
		return isValidInteger;
364
	}
365
	
366
	// Method to encode a string value using `UTF-8` encoding scheme
327
                    break;
328

  
329
                default:        // otherwise: abort
330
                    // resultTotal = resumptionInt;
331
                    break;
332
            }
333

  
334
        } catch (Exception e) {
335
            log.error(e);
336
            throw new IllegalStateException("collection failed: " + e.getMessage());
337
        }
338

  
339
        try {
340
            if (resultTotal == -1) {
341
                resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
342
                if (resumptionType.toLowerCase().equals("page") && !AUTHBASIC.equalsIgnoreCase(authMethod)) {
343
                    resultTotal += 1;
344
                }           // to correct the upper bound
345
                log.info("resultTotal was -1 is now: " + resultTotal);
346
            }
347
        } catch (Exception e) {
348
            log.error(e);
349
            throw new IllegalStateException("downloadPage() resultTotal couldn't parse: " + e.getMessage());
350
        }
351
        log.debug("resultTotal: " + resultTotal);
352
        log.debug("resInt: " + resumptionInt);
353
        if (resumptionInt <= resultTotal) {
354
            nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
355
        } else {
356
            nextQuery = "";
357
            // if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; }    	// correct the resumptionInt and prevent a NullPointer Exception at mdStore
358
        }
359
        log.debug("downloadPage() nextQueryUrl: " + nextQuery);
360
        return nextQuery;
361

  
362

  
363
    }
364

  
365
    private boolean isEmptyXml(String s){
366
        return EMPTY_XML.equalsIgnoreCase(s);
367
    }
368

  
369

  
370
    private boolean isInteger(String s) {
371
        boolean isValidInteger = false;
372
        try {
373
            Integer.parseInt(s);
374

  
375
            // s is a valid integer
376

  
377
            isValidInteger = true;
378
        } catch (NumberFormatException ex) {
379
            // s is not an integer
380
        }
381

  
382
        return isValidInteger;
383
    }
384

  
385
    // Method to encode a string value using `UTF-8` encoding scheme
367 386
    private String encodeValue(String value) {
368 387
        try {
369 388
            return URLEncoder.encode(value, StandardCharsets.UTF_8.toString());
......
372 391
        }
373 392
    }
374 393

  
375
	public String getResultFormatValue() {
376
		return resultFormatValue;
377
	}
394
    private void setRequestHeader(HttpURLConnection conn) {
395
        if (requestHeaders != null) {
396
            for (String key : requestHeaders.keySet()) {
397
                conn.setRequestProperty(key, requestHeaders.get(key));
398
            }
399
            log.debug("Set Request Header with: " + requestHeaders);
400
        }
378 401

  
379
	public String getResultOutputFormat() {
380
		return resultOutputFormat;
381
	}
402
    }
382 403

  
404
    public String getResultFormatValue() {
405
        return resultFormatValue;
406
    }
407

  
408
    public String getResultOutputFormat() {
409
        return resultOutputFormat;
410
    }
411

  
383 412
}

Also available in: Unified diff