32 |
32 |
private int maxNumberOfRetry = 6;
|
33 |
33 |
private int defaultDelay = 120; // seconds
|
34 |
34 |
private int readTimeOut = 120; // seconds
|
35 |
|
private String userAgent = "Mozilla/5.0 (compatible; OAI-Harvester; +http://www.openaire.eu)";
|
|
35 |
private String userAgent = "Mozilla/5.0 (compatible; OAI-Harvester; D-Net framework toolkit)";
|
36 |
36 |
|
37 |
37 |
public HttpConnector() {
|
38 |
38 |
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
|
... | ... | |
44 |
44 |
* @throws CollectorServiceException
|
45 |
45 |
*/
|
46 |
46 |
public String getInputSource(final String requestUrl) throws CollectorServiceException {
|
47 |
|
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
|
|
47 |
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList(), false);
|
48 |
48 |
}
|
49 |
49 |
|
50 |
|
private String attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
|
|
50 |
private String attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList, boolean increaseTimeout)
|
51 |
51 |
throws CollectorServiceException {
|
52 |
52 |
|
53 |
53 |
if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
|
... | ... | |
60 |
60 |
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
61 |
61 |
urlConn.setInstanceFollowRedirects(false);
|
62 |
62 |
urlConn.setReadTimeout(readTimeOut * 1000);
|
|
63 |
if(increaseTimeout){
|
|
64 |
urlConn.setReadTimeout(readTimeOut * 1000 * retryNumber);
|
|
65 |
}
|
|
66 |
log.debug("Read timeout set to "+urlConn.getReadTimeout());
|
63 |
67 |
urlConn.addRequestProperty("User-Agent", userAgent);
|
64 |
68 |
|
65 |
69 |
if (log.isDebugEnabled()) {
|
... | ... | |
71 |
75 |
log.warn("waiting and repeating request after " + retryAfter + " sec.");
|
72 |
76 |
Thread.sleep(retryAfter * 1000);
|
73 |
77 |
errorList.add("503 Service Unavailable");
|
74 |
|
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
|
78 |
return attemptDownload(requestUrl, retryNumber + 1, errorList, false);
|
75 |
79 |
} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode()
|
76 |
80 |
== HttpURLConnection.HTTP_MOVED_TEMP)) {
|
77 |
81 |
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
78 |
82 |
log.info("The requested url has been moved to " + newUrl);
|
79 |
83 |
errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
|
80 |
|
return attemptDownload(newUrl, retryNumber + 1, errorList);
|
|
84 |
return attemptDownload(newUrl, retryNumber + 1, errorList, false);
|
81 |
85 |
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK && urlConn.getResponseCode() != HttpURLConnection.HTTP_CREATED) {
|
82 |
86 |
// 201 fix for Huma-num
|
83 |
87 |
log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
84 |
88 |
Thread.sleep(defaultDelay * 1000);
|
85 |
89 |
errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
86 |
|
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
|
90 |
return attemptDownload(requestUrl, retryNumber + 1, errorList, false);
|
87 |
91 |
} else {
|
88 |
92 |
input = urlConn.getInputStream();
|
89 |
93 |
return IOUtils.toString(input);
|
... | ... | |
92 |
96 |
log.error("error while retrieving from http-connection occured: " + e, e);
|
93 |
97 |
Thread.sleep(defaultDelay * 1000);
|
94 |
98 |
errorList.add(e.getMessage());
|
95 |
|
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
|
99 |
return attemptDownload(requestUrl, retryNumber + 1, errorList, true);
|
96 |
100 |
} finally {
|
97 |
101 |
IOUtils.closeQuietly(input);
|
98 |
102 |
}
|
every time a requests times out, the next timeout is increased