Revision 61031
Added by Alessia Bardi almost 3 years ago
RestIterator.java | ||
---|---|---|
13 | 13 |
import java.nio.charset.StandardCharsets; |
14 | 14 |
import java.net.HttpURLConnection; |
15 | 15 |
import java.util.Iterator; |
16 |
import java.util.Map; |
|
16 | 17 |
import java.util.Queue; |
17 | 18 |
import java.util.concurrent.PriorityBlockingQueue; |
18 | 19 |
import javax.xml.transform.OutputKeys; |
... | ... | |
23 | 24 |
import javax.xml.transform.stream.StreamResult; |
24 | 25 |
import javax.xml.xpath.*; |
25 | 26 |
|
27 |
import com.google.common.collect.Maps; |
|
26 | 28 |
import eu.dnetlib.data.collector.plugins.utils.JsonUtils; |
27 | 29 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
28 | 30 |
import org.apache.commons.io.IOUtils; |
29 | 31 |
import org.apache.commons.lang3.StringUtils; |
30 | 32 |
import org.apache.commons.logging.Log; |
31 | 33 |
import org.apache.commons.logging.LogFactory; |
34 |
import org.apache.http.client.methods.CloseableHttpResponse; |
|
35 |
import org.apache.http.client.methods.HttpGet; |
|
36 |
import org.apache.http.impl.client.HttpClients; |
|
32 | 37 |
import org.w3c.dom.Node; |
33 | 38 |
import org.w3c.dom.NodeList; |
34 | 39 |
import org.xml.sax.InputSource; |
... | ... | |
36 | 41 |
/** |
37 | 42 |
* @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak, Alessia Bardi, Miriam Baglioni |
38 | 43 |
* @date 2020-04-09 |
39 |
* |
|
40 | 44 |
*/ |
41 | 45 |
public class RestIterator implements Iterator<String> { |
42 |
private final String AUTHBASIC = "basic";
|
|
46 |
private final String AUTHBASIC = "basic";
|
|
43 | 47 |
|
44 |
// TODO: clean up the comments of replaced source code |
|
45 |
private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
48 |
// TODO: clean up the comments of replaced source code |
|
49 |
private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
50 |
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; |
|
51 |
private static final String EMPTY_XML = XML_HEADER + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">"; |
|
52 |
private JsonUtils jsonUtils; |
|
46 | 53 |
|
47 |
private JsonUtils jsonUtils; |
|
54 |
private String baseUrl; |
|
55 |
private String resumptionType; |
|
56 |
private String resumptionParam; |
|
57 |
private String resultFormatValue; |
|
58 |
private String queryParams = ""; |
|
59 |
private int resultSizeValue; |
|
60 |
private int resumptionInt = 0; // integer resumption token (first record to harvest) |
|
61 |
private int resultTotal = -1; |
|
62 |
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest or token scanned from results) |
|
63 |
private InputStream resultStream; |
|
64 |
private Transformer transformer; |
|
65 |
private XPath xpath; |
|
66 |
private String query; |
|
67 |
private XPathExpression xprResultTotalPath; |
|
68 |
private XPathExpression xprResumptionPath; |
|
69 |
private XPathExpression xprEntity; |
|
70 |
private String queryFormat; |
|
71 |
private String querySize; |
|
72 |
private String authMethod; |
|
73 |
private String authToken; |
|
74 |
private Queue<String> recordQueue = new PriorityBlockingQueue<String>(); |
|
75 |
private int discoverResultSize = 0; |
|
76 |
private int pagination = 1; |
|
77 |
/* |
|
78 |
While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json. |
|
79 |
useful for cases when the target API expects a resultFormatValue != json, but the results are returned in json. |
|
80 |
An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format. |
|
81 |
*/ |
|
82 |
private String resultOutputFormat; |
|
83 |
/* |
|
84 |
Can be used to set additional request headers, like for content negotiation |
|
85 |
*/ |
|
86 |
private Map<String, String> requestHeaders; |
|
48 | 87 |
|
49 |
private String baseUrl; |
|
50 |
private String resumptionType; |
|
51 |
private String resumptionParam; |
|
52 |
private String resultFormatValue; |
|
53 |
private String queryParams = ""; |
|
54 |
private int resultSizeValue; |
|
55 |
private int resumptionInt = 0; // integer resumption token (first record to harvest) |
|
56 |
private int resultTotal = -1; |
|
57 |
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest or token scanned from results) |
|
58 |
private InputStream resultStream; |
|
59 |
private Transformer transformer; |
|
60 |
private XPath xpath; |
|
61 |
private String query; |
|
62 |
private XPathExpression xprResultTotalPath; |
|
63 |
private XPathExpression xprResumptionPath; |
|
64 |
private XPathExpression xprEntity; |
|
65 |
private String queryFormat; |
|
66 |
private String querySize; |
|
67 |
private String authMethod; |
|
68 |
private String authToken; |
|
69 |
private Queue<String> recordQueue = new PriorityBlockingQueue<String>(); |
|
70 |
private int discoverResultSize = 0; |
|
71 |
private int pagination = 1; |
|
72 |
/* |
|
73 |
While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json. |
|
74 |
useful for cases when the target API expects a resultFormatValue != json, but the results are returned in json. |
|
75 |
An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format. |
|
76 |
*/ |
|
77 |
private String resultOutputFormat; |
|
78 |
|
|
79 | 88 |
|
80 |
public RestIterator( |
|
81 |
final String baseUrl, |
|
82 |
final String resumptionType, |
|
83 |
final String resumptionParam, |
|
84 |
final String resumptionXpath, |
|
85 |
final String resultTotalXpath, |
|
86 |
final String resultFormatParam, |
|
87 |
final String resultFormatValue, |
|
88 |
final String resultSizeParam, |
|
89 |
final String resultSizeValueStr, |
|
90 |
final String queryParams, |
|
91 |
final String entityXpath, |
|
92 |
final String authMethod, |
|
93 |
final String authToken, |
|
94 |
final String resultOutputFormat |
|
95 |
) { |
|
96 |
this.jsonUtils = new JsonUtils(); |
|
97 |
this.baseUrl = baseUrl; |
|
98 |
this.resumptionType = resumptionType; |
|
99 |
this.resumptionParam = resumptionParam; |
|
100 |
this.resultFormatValue = resultFormatValue; |
|
101 |
this.queryParams = queryParams; |
|
102 |
this.resultSizeValue = Integer.valueOf(resultSizeValueStr); |
|
103 |
this.authMethod = authMethod; |
|
104 |
this.authToken = authToken; |
|
105 |
this.resultOutputFormat = resultOutputFormat; |
|
89 |
public RestIterator( |
|
90 |
final String baseUrl, |
|
91 |
final String resumptionType, |
|
92 |
final String resumptionParam, |
|
93 |
final String resumptionXpath, |
|
94 |
final String resultTotalXpath, |
|
95 |
final String resultFormatParam, |
|
96 |
final String resultFormatValue, |
|
97 |
final String resultSizeParam, |
|
98 |
final String resultSizeValueStr, |
|
99 |
final String queryParams, |
|
100 |
final String entityXpath, |
|
101 |
final String authMethod, |
|
102 |
final String authToken, |
|
103 |
final String resultOutputFormat, |
|
104 |
final Map<String, String> requestHeaders |
|
105 |
) { |
|
106 |
this.jsonUtils = new JsonUtils(); |
|
107 |
this.baseUrl = baseUrl; |
|
108 |
this.resumptionType = resumptionType; |
|
109 |
this.resumptionParam = resumptionParam; |
|
110 |
this.resultFormatValue = resultFormatValue; |
|
111 |
this.queryParams = queryParams; |
|
112 |
this.resultSizeValue = Integer.valueOf(resultSizeValueStr); |
|
113 |
this.authMethod = authMethod; |
|
114 |
this.authToken = authToken; |
|
115 |
this.resultOutputFormat = resultOutputFormat; |
|
116 |
this.requestHeaders = requestHeaders != null ? requestHeaders : Maps.newHashMap(); |
|
106 | 117 |
|
107 |
queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
|
|
108 |
querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
|
|
118 |
queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
|
|
119 |
querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
|
|
109 | 120 |
|
110 |
try { |
|
111 |
initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath); |
|
112 |
} catch (Exception e) { |
|
113 |
throw new IllegalStateException("xml transformation init failed: " + e.getMessage()); |
|
114 |
} |
|
115 |
initQueue(); |
|
116 |
} |
|
117 |
|
|
118 |
|
|
119 |
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) |
|
120 |
throws TransformerConfigurationException, XPathExpressionException { |
|
121 |
transformer = TransformerFactory.newInstance().newTransformer(); |
|
122 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
123 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); |
|
124 |
xpath = XPathFactory.newInstance().newXPath(); |
|
125 |
xprResultTotalPath = xpath.compile(resultTotalXpath); |
|
126 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath); |
|
127 |
xprEntity = xpath.compile(entityXpath); |
|
128 |
} |
|
121 |
try { |
|
122 |
initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath); |
|
123 |
} catch (Exception e) { |
|
124 |
throw new IllegalStateException("xml transformation init failed: " + e.getMessage()); |
|
125 |
} |
|
126 |
initQueue(); |
|
127 |
} |
|
129 | 128 |
|
130 |
private void initQueue() { |
|
131 |
if( queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) { |
|
132 |
query = baseUrl; |
|
133 |
} else { |
|
134 |
query = baseUrl + "?" + queryParams + querySize + queryFormat; |
|
135 |
} |
|
136 | 129 |
|
137 |
log.info("REST calls starting with "+query); |
|
138 |
} |
|
130 |
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) |
|
131 |
throws TransformerConfigurationException, XPathExpressionException { |
|
132 |
transformer = TransformerFactory.newInstance().newTransformer(); |
|
133 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
134 |
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); |
|
135 |
xpath = XPathFactory.newInstance().newXPath(); |
|
136 |
xprResultTotalPath = xpath.compile(resultTotalXpath); |
|
137 |
xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath); |
|
138 |
xprEntity = xpath.compile(entityXpath); |
|
139 |
} |
|
139 | 140 |
|
140 |
private void disconnect() { |
|
141 |
// TODO close inputstream |
|
142 |
} |
|
141 |
private void initQueue() { |
|
142 |
if (queryParams.equals("") && querySize.equals("") && queryFormat.equals("")) { |
|
143 |
query = baseUrl; |
|
144 |
} else { |
|
145 |
query = baseUrl + "?" + queryParams + querySize + queryFormat; |
|
146 |
} |
|
143 | 147 |
|
144 |
/* (non-Javadoc) |
|
145 |
* @see java.util.Iterator#hasNext() |
|
146 |
*/ |
|
147 |
@Override |
|
148 |
public boolean hasNext() { |
|
149 |
if (recordQueue.isEmpty() && query.isEmpty()) { |
|
150 |
disconnect(); |
|
151 |
return false; |
|
152 |
} else { |
|
153 |
return true; |
|
154 |
} |
|
155 |
} |
|
148 |
log.info("REST calls starting with " + query); |
|
149 |
} |
|
156 | 150 |
|
157 |
/* (non-Javadoc) |
|
158 |
* @see java.util.Iterator#next() |
|
159 |
*/ |
|
160 |
@Override |
|
161 |
public String next() { |
|
162 |
synchronized (recordQueue) { |
|
163 |
while (recordQueue.isEmpty() && !query.isEmpty()) { |
|
164 |
try { |
|
165 |
log.debug("get Query: " + query); |
|
166 |
query = downloadPage(query); |
|
167 |
log.debug("next queryURL from downloadPage(): " + query); |
|
168 |
} catch (CollectorServiceException e) { |
|
169 |
log.debug("CollectorPlugin.next()-Exception: " + e); |
|
170 |
throw new RuntimeException(e); |
|
171 |
} |
|
172 |
} |
|
173 |
return recordQueue.poll(); |
|
174 |
} |
|
175 |
} |
|
151 |
private void disconnect() { |
|
152 |
// TODO close inputstream |
|
153 |
} |
|
176 | 154 |
|
177 |
/* |
|
178 |
* download page and return nextQuery |
|
179 |
*/ |
|
180 |
private String downloadPage(String query) throws CollectorServiceException { |
|
181 |
String resultJson; |
|
182 |
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; |
|
183 |
String nextQuery = ""; |
|
184 |
String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">"; |
|
185 |
Node resultNode = null; |
|
186 |
NodeList nodeList = null; |
|
187 |
String qUrlArgument = ""; |
|
188 |
int urlOldResumptionSize = 0; |
|
189 |
InputStream theHttpInputStream; |
|
190 |
|
|
191 |
// check if cursor=* is initial set otherwise add it to the queryParam URL |
|
192 |
if( resumptionType.equalsIgnoreCase("deep-cursor") ) { |
|
193 |
log.debug("check resumptionType deep-cursor and check cursor=*?" + query); |
|
194 |
if(!query.contains("&cursor=")) { |
|
195 |
query += "&cursor=*"; |
|
196 |
} |
|
197 |
} |
|
155 |
/* (non-Javadoc) |
|
156 |
* @see java.util.Iterator#hasNext() |
|
157 |
*/ |
|
158 |
@Override |
|
159 |
public boolean hasNext() { |
|
160 |
if (recordQueue.isEmpty() && query.isEmpty()) { |
|
161 |
disconnect(); |
|
162 |
return false; |
|
163 |
} else { |
|
164 |
return true; |
|
165 |
} |
|
166 |
} |
|
198 | 167 |
|
199 |
try { |
|
200 |
URL qUrl = new URL(query); |
|
201 |
log.debug("authMethod :" + authMethod); |
|
202 |
if (this.authMethod == "bearer") { |
|
203 |
log.trace("authMethod before inputStream: " + resultXml); |
|
204 |
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); |
|
205 |
conn.setRequestProperty("Authorization","Bearer "+authToken); |
|
206 |
conn.setRequestProperty("Content-Type","application/json"); |
|
207 |
conn.setRequestMethod("GET"); |
|
208 |
theHttpInputStream = conn.getInputStream(); |
|
209 |
}else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) { |
|
210 |
log.trace("authMethod before inputStream: " + resultXml); |
|
211 |
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); |
|
212 |
conn.setRequestProperty("Authorization","Basic "+authToken); |
|
213 |
conn.setRequestProperty("accept","application/xml"); |
|
214 |
conn.setRequestMethod("GET"); |
|
215 |
theHttpInputStream = conn.getInputStream(); |
|
216 |
} else { |
|
217 |
theHttpInputStream = qUrl.openStream(); |
|
218 |
} |
|
219 |
|
|
220 |
resultStream = theHttpInputStream; |
|
221 |
if ("json".equals(resultOutputFormat)) { |
|
222 |
resultJson = IOUtils.toString(resultStream, "UTF-8"); |
|
223 |
resultXml = jsonUtils.convertToXML(resultJson); |
|
224 |
resultStream = IOUtils.toInputStream(resultXml, "UTF-8"); |
|
225 |
} |
|
168 |
/* (non-Javadoc) |
|
169 |
* @see java.util.Iterator#next() |
|
170 |
*/ |
|
171 |
@Override |
|
172 |
public String next() { |
|
173 |
synchronized (recordQueue) { |
|
174 |
while (recordQueue.isEmpty() && !query.isEmpty()) { |
|
175 |
try { |
|
176 |
log.debug("get Query: " + query); |
|
177 |
query = downloadPage(query); |
|
178 |
log.debug("next queryURL from downloadPage(): " + query); |
|
179 |
} catch (CollectorServiceException e) { |
|
180 |
log.debug("CollectorPlugin.next()-Exception: " + e); |
|
181 |
throw new RuntimeException(e); |
|
182 |
} |
|
183 |
} |
|
184 |
return recordQueue.poll(); |
|
185 |
} |
|
186 |
} |
|
226 | 187 |
|
227 |
if (!(emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) { |
|
228 |
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); |
|
229 |
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); |
|
230 |
log.debug("nodeList.length: " + nodeList.getLength()); |
|
231 |
for (int i = 0; i < nodeList.getLength(); i++) { |
|
232 |
StringWriter sw = new StringWriter(); |
|
233 |
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); |
|
234 |
String toEnqueue = sw.toString(); |
|
235 |
if(toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)){ |
|
236 |
log.warn("The following record resulted in empty item for the feeding queue: "+resultXml); |
|
237 |
} |
|
238 |
else{ recordQueue.add(sw.toString());} |
|
239 |
} |
|
240 |
} else { log.warn("resultXml is equal with emptyXml"); } |
|
188 |
/* |
|
189 |
* download page and return nextQuery |
|
190 |
*/ |
|
191 |
private String downloadPage(String query) throws CollectorServiceException { |
|
192 |
String resultJson; |
|
193 |
String resultXml = XML_HEADER; |
|
194 |
String nextQuery = ""; |
|
195 |
Node resultNode = null; |
|
196 |
NodeList nodeList = null; |
|
197 |
String qUrlArgument = ""; |
|
198 |
int urlOldResumptionSize = 0; |
|
199 |
InputStream theHttpInputStream; |
|
241 | 200 |
|
242 |
resumptionInt += resultSizeValue; |
|
201 |
// check if cursor=* is initial set otherwise add it to the queryParam URL |
|
202 |
if (resumptionType.equalsIgnoreCase("deep-cursor")) { |
|
203 |
log.debug("check resumptionType deep-cursor and check cursor=*?" + query); |
|
204 |
if (!query.contains("&cursor=")) { |
|
205 |
query += "&cursor=*"; |
|
206 |
} |
|
207 |
} |
|
243 | 208 |
|
244 |
switch (resumptionType.toLowerCase()) { |
|
245 |
case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items |
|
246 |
resumptionStr = xprResumptionPath.evaluate(resultNode); |
|
247 |
break; |
|
209 |
try { |
|
210 |
URL qUrl = new URL(query); |
|
211 |
log.debug("authMethod :" + authMethod); |
|
212 |
if (this.authMethod == "bearer") { |
|
213 |
log.trace("authMethod before inputStream: " + resultXml); |
|
214 |
requestHeaders.put("Authorization", "Bearer " + authToken); |
|
215 |
requestHeaders.put("Content-Type", "application/json"); |
|
216 |
} else if (AUTHBASIC.equalsIgnoreCase(this.authMethod)) { |
|
217 |
log.trace("authMethod before inputStream: " + resultXml); |
|
218 |
requestHeaders.put("Authorization", "Basic " + authToken); |
|
219 |
requestHeaders.put("accept", "application/xml"); |
|
220 |
} |
|
248 | 221 |
|
249 |
case "count": // begin at one step for all records, iterate over items |
|
250 |
resumptionStr = Integer.toString(resumptionInt); |
|
251 |
break; |
|
222 |
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); |
|
223 |
conn.setRequestMethod("GET"); |
|
224 |
this.setRequestHeader(conn); |
|
225 |
resultStream = conn.getInputStream(); |
|
252 | 226 |
|
253 |
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808) |
|
254 |
if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");} |
|
255 |
qUrlArgument = qUrl.getQuery(); |
|
256 |
String[] arrayQUrlArgument = qUrlArgument.split("&"); |
|
257 |
for (String arrayUrlArgStr : arrayQUrlArgument) { |
|
258 |
if (arrayUrlArgStr.startsWith(resumptionParam)) { |
|
259 |
String[] resumptionKeyValue = arrayUrlArgStr.split("="); |
|
260 |
if(isInteger(resumptionKeyValue[1])) { |
|
261 |
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); |
|
262 |
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); |
|
263 |
} else { |
|
264 |
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); |
|
265 |
} |
|
266 |
} |
|
267 |
} |
|
227 |
if ("json".equals(resultOutputFormat)) { |
|
228 |
resultJson = IOUtils.toString(resultStream, "UTF-8"); |
|
229 |
resultXml = jsonUtils.convertToXML(resultJson); |
|
230 |
resultStream = IOUtils.toInputStream(resultXml, "UTF-8"); |
|
231 |
} |
|
268 | 232 |
|
269 |
if (((emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) |
|
270 |
|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue)) |
|
271 |
) { |
|
272 |
// resumptionStr = ""; |
|
273 |
if (nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
274 |
resultTotal = discoverResultSize; |
|
275 |
} else { |
|
276 |
resumptionStr = Integer.toString(resumptionInt); |
|
277 |
resultTotal = resumptionInt + 1; |
|
278 |
if (nodeList != null) { discoverResultSize += nodeList.getLength(); } |
|
279 |
} |
|
280 |
log.debug("discoverResultSize: " + discoverResultSize); |
|
281 |
break; |
|
233 |
if (!isEmptyXml(resultXml)) { |
|
234 |
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); |
|
235 |
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); |
|
236 |
log.debug("nodeList.length: " + nodeList.getLength()); |
|
237 |
for (int i = 0; i < nodeList.getLength(); i++) { |
|
238 |
StringWriter sw = new StringWriter(); |
|
239 |
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); |
|
240 |
String toEnqueue = sw.toString(); |
|
241 |
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) { |
|
242 |
log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); |
|
243 |
} else { |
|
244 |
recordQueue.add(sw.toString()); |
|
245 |
} |
|
246 |
} |
|
247 |
} else { |
|
248 |
log.warn("resultXml is equal with emptyXml"); |
|
249 |
} |
|
282 | 250 |
|
283 |
case "pagination": |
|
284 |
case "page": // pagination, iterate over page numbers |
|
285 |
pagination += 1; |
|
286 |
if (nodeList != null) { |
|
287 |
discoverResultSize += nodeList.getLength(); |
|
288 |
} else { |
|
289 |
resultTotal = discoverResultSize; |
|
290 |
pagination = discoverResultSize; |
|
291 |
} |
|
292 |
resumptionInt = pagination; |
|
293 |
resumptionStr = Integer.toString(resumptionInt); |
|
294 |
break; |
|
251 |
resumptionInt += resultSizeValue; |
|
295 | 252 |
|
296 |
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in solr) |
|
297 |
// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");} |
|
253 |
switch (resumptionType.toLowerCase()) { |
|
254 |
case "scan": // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items |
|
255 |
resumptionStr = xprResumptionPath.evaluate(resultNode); |
|
256 |
break; |
|
298 | 257 |
|
299 |
resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode)); |
|
300 |
queryParams = queryParams.replace("&cursor=*", ""); |
|
301 |
|
|
302 |
// terminating if length of nodeList is 0 |
|
303 |
if( (nodeList != null) && (nodeList.getLength() < discoverResultSize) ) { |
|
304 |
resumptionInt += ( nodeList.getLength() + 1 - resultSizeValue); |
|
305 |
} else { |
|
306 |
resumptionInt += (nodeList.getLength() - resultSizeValue); // subtract the resultSizeValue because the iteration is over real length and the resultSizeValue is added before the switch() |
|
307 |
} |
|
308 |
|
|
309 |
discoverResultSize = nodeList.getLength(); |
|
310 |
|
|
311 |
log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " resumptionLengthIncreased: " + resumptionInt); |
|
258 |
case "count": // begin at one step for all records, iterate over items |
|
259 |
resumptionStr = Integer.toString(resumptionInt); |
|
260 |
break; |
|
312 | 261 |
|
313 |
break; |
|
314 |
|
|
315 |
default: // otherwise: abort |
|
316 |
// resultTotal = resumptionInt; |
|
317 |
break; |
|
318 |
} |
|
262 |
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808) |
|
263 |
if (resultSizeValue < 2) { |
|
264 |
throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2"); |
|
265 |
} |
|
266 |
qUrlArgument = qUrl.getQuery(); |
|
267 |
String[] arrayQUrlArgument = qUrlArgument.split("&"); |
|
268 |
for (String arrayUrlArgStr : arrayQUrlArgument) { |
|
269 |
if (arrayUrlArgStr.startsWith(resumptionParam)) { |
|
270 |
String[] resumptionKeyValue = arrayUrlArgStr.split("="); |
|
271 |
if (isInteger(resumptionKeyValue[1])) { |
|
272 |
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); |
|
273 |
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); |
|
274 |
} else { |
|
275 |
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); |
|
276 |
} |
|
277 |
} |
|
278 |
} |
|
319 | 279 |
|
320 |
} catch (Exception e) { |
|
321 |
log.error(e); |
|
322 |
throw new IllegalStateException("collection failed: " + e.getMessage()); |
|
323 |
} |
|
324 |
|
|
325 |
try { |
|
326 |
if (resultTotal == -1) { |
|
327 |
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode)); |
|
328 |
if (resumptionType.toLowerCase().equals("page") && !AUTHBASIC.equalsIgnoreCase(authMethod)) { resultTotal += 1; } // to correct the upper bound |
|
329 |
log.info("resultTotal was -1 is now: " + resultTotal); |
|
330 |
} |
|
331 |
} catch(Exception e) { |
|
332 |
log.error(e); |
|
333 |
throw new IllegalStateException("downloadPage() resultTotal couldn't parse: " + e.getMessage()); |
|
334 |
} |
|
335 |
log.debug("resultTotal: " + resultTotal); |
|
336 |
log.debug("resInt: " + resumptionInt); |
|
337 |
if (resumptionInt <= resultTotal) { |
|
338 |
nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat; |
|
339 |
} else { |
|
340 |
nextQuery = ""; |
|
341 |
// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the resumptionInt and prevent a NullPointer Exception at mdStore |
|
342 |
} |
|
343 |
log.debug("downloadPage() nextQueryUrl: " + nextQuery); |
|
344 |
return nextQuery; |
|
280 |
if (isEmptyXml(resultXml) || ((nodeList != null) && (nodeList.getLength() < resultSizeValue)) |
|
281 |
) { |
|
282 |
// resumptionStr = ""; |
|
283 |
if (nodeList != null) { |
|
284 |
discoverResultSize += nodeList.getLength(); |
|
285 |
} |
|
286 |
resultTotal = discoverResultSize; |
|
287 |
} else { |
|
288 |
resumptionStr = Integer.toString(resumptionInt); |
|
289 |
resultTotal = resumptionInt + 1; |
|
290 |
if (nodeList != null) { |
|
291 |
discoverResultSize += nodeList.getLength(); |
|
292 |
} |
|
293 |
} |
|
294 |
log.debug("discoverResultSize: " + discoverResultSize); |
|
295 |
break; |
|
345 | 296 |
|
297 |
case "pagination": |
|
298 |
case "page": // pagination, iterate over page numbers |
|
299 |
pagination += 1; |
|
300 |
if (nodeList != null) { |
|
301 |
discoverResultSize += nodeList.getLength(); |
|
302 |
} else { |
|
303 |
resultTotal = discoverResultSize; |
|
304 |
pagination = discoverResultSize; |
|
305 |
} |
|
306 |
resumptionInt = pagination; |
|
307 |
resumptionStr = Integer.toString(resumptionInt); |
|
308 |
break; |
|
346 | 309 |
|
347 |
} |
|
310 |
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in solr) |
|
311 |
// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: deep-cursor, Param 'resultSizeValue' is less than 2");} |
|
348 | 312 |
|
313 |
resumptionStr = encodeValue(xprResumptionPath.evaluate(resultNode)); |
|
314 |
queryParams = queryParams.replace("&cursor=*", ""); |
|
349 | 315 |
|
350 |
|
|
351 |
private boolean isInteger(String s) { |
|
352 |
boolean isValidInteger = false; |
|
353 |
try { |
|
354 |
Integer.parseInt(s); |
|
316 |
// terminating if length of nodeList is 0 |
|
317 |
if ((nodeList != null) && (nodeList.getLength() < discoverResultSize)) { |
|
318 |
resumptionInt += (nodeList.getLength() + 1 - resultSizeValue); |
|
319 |
} else { |
|
320 |
resumptionInt += (nodeList.getLength() - resultSizeValue); // subtract the resultSizeValue because the iteration is over real length and the resultSizeValue is added before the switch() |
|
321 |
} |
|
355 | 322 |
|
356 |
// s is a valid integer
|
|
323 |
discoverResultSize = nodeList.getLength();
|
|
357 | 324 |
|
358 |
isValidInteger = true; |
|
359 |
} catch (NumberFormatException ex) { |
|
360 |
// s is not an integer |
|
361 |
} |
|
325 |
log.debug("downloadPage().deep-cursor: resumptionStr=" + resumptionStr + " ; queryParams=" + queryParams + " resumptionLengthIncreased: " + resumptionInt); |
|
362 | 326 |
|
363 |
return isValidInteger; |
|
364 |
} |
|
365 |
|
|
366 |
// Method to encode a string value using `UTF-8` encoding scheme |
|
327 |
break; |
|
328 |
|
|
329 |
default: // otherwise: abort |
|
330 |
// resultTotal = resumptionInt; |
|
331 |
break; |
|
332 |
} |
|
333 |
|
|
334 |
} catch (Exception e) { |
|
335 |
log.error(e); |
|
336 |
throw new IllegalStateException("collection failed: " + e.getMessage()); |
|
337 |
} |
|
338 |
|
|
339 |
try { |
|
340 |
if (resultTotal == -1) { |
|
341 |
resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode)); |
|
342 |
if (resumptionType.toLowerCase().equals("page") && !AUTHBASIC.equalsIgnoreCase(authMethod)) { |
|
343 |
resultTotal += 1; |
|
344 |
} // to correct the upper bound |
|
345 |
log.info("resultTotal was -1 is now: " + resultTotal); |
|
346 |
} |
|
347 |
} catch (Exception e) { |
|
348 |
log.error(e); |
|
349 |
throw new IllegalStateException("downloadPage() resultTotal couldn't parse: " + e.getMessage()); |
|
350 |
} |
|
351 |
log.debug("resultTotal: " + resultTotal); |
|
352 |
log.debug("resInt: " + resumptionInt); |
|
353 |
if (resumptionInt <= resultTotal) { |
|
354 |
nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat; |
|
355 |
} else { |
|
356 |
nextQuery = ""; |
|
357 |
// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the resumptionInt and prevent a NullPointer Exception at mdStore |
|
358 |
} |
|
359 |
log.debug("downloadPage() nextQueryUrl: " + nextQuery); |
|
360 |
return nextQuery; |
|
361 |
|
|
362 |
|
|
363 |
} |
|
364 |
|
|
365 |
private boolean isEmptyXml(String s){ |
|
366 |
return EMPTY_XML.equalsIgnoreCase(s); |
|
367 |
} |
|
368 |
|
|
369 |
|
|
370 |
private boolean isInteger(String s) { |
|
371 |
boolean isValidInteger = false; |
|
372 |
try { |
|
373 |
Integer.parseInt(s); |
|
374 |
|
|
375 |
// s is a valid integer |
|
376 |
|
|
377 |
isValidInteger = true; |
|
378 |
} catch (NumberFormatException ex) { |
|
379 |
// s is not an integer |
|
380 |
} |
|
381 |
|
|
382 |
return isValidInteger; |
|
383 |
} |
|
384 |
|
|
385 |
// Method to encode a string value using `UTF-8` encoding scheme |
|
367 | 386 |
private String encodeValue(String value) { |
368 | 387 |
try { |
369 | 388 |
return URLEncoder.encode(value, StandardCharsets.UTF_8.toString()); |
... | ... | |
372 | 391 |
} |
373 | 392 |
} |
374 | 393 |
|
375 |
public String getResultFormatValue() { |
|
376 |
return resultFormatValue; |
|
377 |
} |
|
394 |
private void setRequestHeader(HttpURLConnection conn) { |
|
395 |
if (requestHeaders != null) { |
|
396 |
for (String key : requestHeaders.keySet()) { |
|
397 |
conn.setRequestProperty(key, requestHeaders.get(key)); |
|
398 |
} |
|
399 |
log.debug("Set Request Header with: " + requestHeaders); |
|
400 |
} |
|
378 | 401 |
|
379 |
public String getResultOutputFormat() { |
|
380 |
return resultOutputFormat; |
|
381 |
} |
|
402 |
} |
|
382 | 403 |
|
404 |
public String getResultFormatValue() { |
|
405 |
return resultFormatValue; |
|
406 |
} |
|
407 |
|
|
408 |
public String getResultOutputFormat() { |
|
409 |
return resultOutputFormat; |
|
410 |
} |
|
411 |
|
|
383 | 412 |
} |
Also available in: Unified diff
REST plugin accepts optional Request Headers parameters as json map, e.g. {Accept:application/json}. This is useful to support PostgREST endpoints: they need a param in the header to return proper json. request headers must also be exploited for the pagination (not implemented in this commit). More details on PostgREST at https://postgrest.org/en/stable/api.html.
This commit also include minor refactoring