Project

General

Profile

1 49643 alessia.ba
package eu.dnetlib.data.collector.plugins;
2 28959 michele.ar
3 29018 michele.ar
import java.io.IOException;
4 29169 andrea.man
import java.io.InputStream;
5 51556 claudio.at
import java.net.*;
6 28959 michele.ar
import java.security.GeneralSecurityException;
7
import java.security.cert.X509Certificate;
8
import java.util.List;
9
import java.util.Map;
10
import javax.net.ssl.HttpsURLConnection;
11
import javax.net.ssl.SSLContext;
12
import javax.net.ssl.TrustManager;
13
import javax.net.ssl.X509TrustManager;
14
15 51556 claudio.at
import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
16
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
17 29018 michele.ar
import org.apache.commons.io.IOUtils;
18 50665 jochen.sch
import org.apache.commons.lang3.math.NumberUtils;
19 28959 michele.ar
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
22
/**
23 57866 alessia.ba
 * @author jochen, michele, andrea, alessia
24 28959 michele.ar
 */
25
public class HttpConnector {
26 29020 michele.ar
27 28959 michele.ar
	private static final Log log = LogFactory.getLog(HttpConnector.class);
28
29 34867 andrea.man
	private int maxNumberOfRetry = 6;
30
	private int defaultDelay = 120; // seconds
31
	private int readTimeOut = 120; // seconds
32 51955 miriam.bag
33 53932 alessia.ba
	private String responseType = null;
34 51955 miriam.bag
35 48804 jochen.sch
	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
36 29020 michele.ar
37 53932 alessia.ba
	public HttpConnector() {
38
		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
39
	}
40
41 28959 michele.ar
	/**
42 51556 claudio.at
	 * Given the URL returns the content via HTTP GET
43
	 *
44
	 * @param requestUrl the URL
45 34867 andrea.man
	 * @return the content of the downloaded resource
46 51556 claudio.at
	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
47 28959 michele.ar
	 */
48 29018 michele.ar
	public String getInputSource(final String requestUrl) throws CollectorServiceException {
49 53932 alessia.ba
		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
50
	}
51
52
	/**
53
	 * Given the URL returns the content as a stream via HTTP GET
54
	 *
55
	 * @param requestUrl the URL
56
	 * @return the content of the downloaded resource as InputStream
57
	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
58
	 */
59
	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException {
60 38175 andrea.man
		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
61 29020 michele.ar
	}
62 29018 michele.ar
63 53932 alessia.ba
	private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
64
			throws CollectorServiceException {
65
		try {
66
			InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
67
			try {
68
				return IOUtils.toString(s);
69
			} catch (IOException e) {
70
				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
71
				Thread.sleep(defaultDelay * 1000);
72
				errorList.add(e.getMessage());
73
				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
74
			}
75
			finally{
76
				IOUtils.closeQuietly(s);
77
			}
78
		} catch (InterruptedException e) {
79
			throw new CollectorServiceException(e);
80
		}
81
	}
82 51955 miriam.bag
83 53932 alessia.ba
	private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
84 38175 andrea.man
			throws CollectorServiceException {
85 29020 michele.ar
86 38175 andrea.man
		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
87 34867 andrea.man
88 29020 michele.ar
		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
89 28959 michele.ar
		try {
90 29169 andrea.man
			InputStream input = null;
91 34867 andrea.man
92 29020 michele.ar
			try {
93 37627 michele.ar
				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
94 34867 andrea.man
				urlConn.setInstanceFollowRedirects(false);
95 31724 michele.ar
				urlConn.setReadTimeout(readTimeOut * 1000);
96 51556 claudio.at
				urlConn.addRequestProperty("User-Agent", userAgent);
97 28959 michele.ar
98 29020 michele.ar
				if (log.isDebugEnabled()) {
99
					logHeaderFields(urlConn);
100
				}
101
102
				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
103 57866 alessia.ba
				if (is2xx(urlConn.getResponseCode())) {
104 29169 andrea.man
					input = urlConn.getInputStream();
105 51955 miriam.bag
					responseType = urlConn.getContentType();
106 53932 alessia.ba
					return input;
107 29020 michele.ar
				}
108 57866 alessia.ba
				if (is3xx(urlConn.getResponseCode())) {
109
					//REDIRECTS
110
					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
111
					log.debug(String.format("The requested url %s has been moved to %s", requestUrl, newUrl));
112
					errorList.add(String.format("%s %s %s. Moved to: %s", requestUrl, urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
113
					urlConn.disconnect();
114
					if (retryAfter > 0) Thread.sleep(retryAfter * 1000);
115
					return attemptDownload(newUrl, retryNumber + 1, errorList);
116
				}
117
				if (is4xx(urlConn.getResponseCode())) {
118
					//CLIENT ERROR, DO NOT RETRY
119
					errorList.add(String.format("%s error %s: %s", requestUrl, urlConn.getResponseCode(), urlConn.getResponseMessage()));
120
					throw new CollectorServiceException("4xx error: request will not be repeated." + errorList);
121
				}
122
				if (is5xx(urlConn.getResponseCode())) {
123
					//SERVER SIDE ERRORS RETRY ONLY on 503
124
					switch (urlConn.getResponseCode()) {
125
						case HttpURLConnection.HTTP_UNAVAILABLE:
126
							if (retryAfter > 0) {
127
								log.warn(requestUrl+" - waiting and repeating request after suggested retry-after" + retryAfter + " sec.");
128
								Thread.sleep(retryAfter * 1000);
129
							}
130
							else {
131
								log.warn(requestUrl+" - waiting and repeating request after default delay of " + defaultDelay + " sec.");
132
								Thread.sleep(defaultDelay * 1000);
133
							}
134
							errorList.add(requestUrl+" 503 Service Unavailable");
135
							urlConn.disconnect();
136
							return attemptDownload(requestUrl, retryNumber + 1, errorList);
137
						default:
138
							errorList.add(String.format("%s Error %s: %s", requestUrl, urlConn.getResponseCode(), urlConn.getResponseMessage()));
139
							throw new CollectorServiceException(urlConn.getResponseCode() + "error" + errorList);
140
					}
141
				}
142
				throw new CollectorServiceException("Unexpected status code: " + urlConn.getResponseCode() + "error" + errorList);
143
			}catch(MalformedURLException | NoRouteToHostException e){
144
				errorList.add(String.format("Error: %s for request url: %s", e.getCause(), requestUrl));
145
				throw new CollectorServiceException(e+ "error "+errorList);
146 29020 michele.ar
			} catch (IOException e) {
147 31724 michele.ar
				Thread.sleep(defaultDelay * 1000);
148 57866 alessia.ba
				errorList.add(requestUrl+ " "+e.getMessage());
149 38175 andrea.man
				return attemptDownload(requestUrl, retryNumber + 1, errorList);
150 29018 michele.ar
			}
151
		} catch (InterruptedException e) {
152
			throw new CollectorServiceException(e);
153 28959 michele.ar
		}
154
	}
155
156 29018 michele.ar
	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
157
		log.debug("StatusCode: " + urlConn.getResponseMessage());
158 29020 michele.ar
159 34867 andrea.man
		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
160 29018 michele.ar
			if (e.getKey() != null) {
161
				for (String v : e.getValue()) {
162 29020 michele.ar
					log.debug("  key: " + e.getKey() + " - value: " + v);
163 29018 michele.ar
				}
164
			}
165
		}
166 28959 michele.ar
	}
167 29020 michele.ar
168 29018 michele.ar
	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
169 34867 andrea.man
		for (String key : headerMap.keySet()) {
170 53932 alessia.ba
			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
171
				return Integer
172
						.parseInt(headerMap.get(key).get(0)) + 10;
173
			}
174 28959 michele.ar
		}
175 29018 michele.ar
		return -1;
176 28959 michele.ar
	}
177 29020 michele.ar
178 29018 michele.ar
	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
179 34867 andrea.man
		for (String key : headerMap.keySet()) {
180 38105 andrea.man
			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
181 29018 michele.ar
		}
182
		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
183
	}
184 28959 michele.ar
185
	/**
186
	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
187
	 */
188 31724 michele.ar
	public void initTrustManager() {
189 28959 michele.ar
		final X509TrustManager tm = new X509TrustManager() {
190 34867 andrea.man
191
			@Override
192 53932 alessia.ba
			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
193
			}
194 34867 andrea.man
195
			@Override
196 53932 alessia.ba
			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
197
			}
198 34867 andrea.man
199
			@Override
200
			public X509Certificate[] getAcceptedIssuers() {
201 28959 michele.ar
				return null;
202
			}
203
		};
204
		try {
205
			final SSLContext ctx = SSLContext.getInstance("TLS");
206 34867 andrea.man
			ctx.init(null, new TrustManager[] { tm }, null);
207 28959 michele.ar
			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
208
		} catch (GeneralSecurityException e) {
209
			log.fatal(e);
210
			throw new IllegalStateException(e);
211
		}
212
	}
213 31724 michele.ar
214 57866 alessia.ba
	private boolean is2xx(final int statusCode) {
215
		return statusCode >= 200 && statusCode <=299;
216
	}
217
218
	private boolean is4xx(final int statusCode) {
219
		return statusCode >= 400 && statusCode <=499;
220
	}
221
222
	private boolean is3xx(final int statusCode) {
223
		return statusCode >= 300 && statusCode <=399;
224
	}
225
226
	private boolean is5xx(final int statusCode) {
227
		return statusCode >= 500 && statusCode <=599;
228
	}
229
230
231 31724 michele.ar
	public int getMaxNumberOfRetry() {
232
		return maxNumberOfRetry;
233
	}
234
235 34867 andrea.man
	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
236 31724 michele.ar
		this.maxNumberOfRetry = maxNumberOfRetry;
237
	}
238
239
	public int getDefaultDelay() {
240
		return defaultDelay;
241
	}
242
243 34867 andrea.man
	public void setDefaultDelay(final int defaultDelay) {
244 31724 michele.ar
		this.defaultDelay = defaultDelay;
245
	}
246
247
	public int getReadTimeOut() {
248
		return readTimeOut;
249
	}
250
251 34867 andrea.man
	public void setReadTimeOut(final int readTimeOut) {
252 31724 michele.ar
		this.readTimeOut = readTimeOut;
253
	}
254 29169 andrea.man
255 53932 alessia.ba
	public String getResponseType() {
256
		return responseType;
257
	}
258 51955 miriam.bag
259 34867 andrea.man
}