Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.net.*;
6
import java.security.GeneralSecurityException;
7
import java.security.cert.X509Certificate;
8
import java.util.List;
9
import java.util.Map;
10
import javax.net.ssl.HttpsURLConnection;
11
import javax.net.ssl.SSLContext;
12
import javax.net.ssl.TrustManager;
13
import javax.net.ssl.X509TrustManager;
14

    
15
import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
16
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
17
import org.apache.commons.io.IOUtils;
18
import org.apache.commons.lang3.math.NumberUtils;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21

    
22
/**
23
 * @author jochen, michele, andrea
24
 */
25
public class HttpConnector {
26

    
27
	private static final Log log = LogFactory.getLog(HttpConnector.class);
28

    
29
	private int maxNumberOfRetry = 6;
30
	private int defaultDelay = 120; // seconds
31
	private int readTimeOut = 120; // seconds
32

    
33
	private String responseType = null;
34

    
35
	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
36

    
37
	public HttpConnector() {
38
		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
39
	}
40

    
41
	/**
42
	 * Given the URL returns the content via HTTP GET
43
	 *
44
	 * @param requestUrl the URL
45
	 * @return the content of the downloaded resource
46
	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
47
	 */
48
	public String getInputSource(final String requestUrl) throws CollectorServiceException {
49
		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
50
	}
51

    
52
	/**
53
	 * Given the URL returns the content as a stream via HTTP GET
54
	 *
55
	 * @param requestUrl the URL
56
	 * @return the content of the downloaded resource as InputStream
57
	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
58
	 */
59
	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException {
60
		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
61
	}
62

    
63
	private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
64
			throws CollectorServiceException {
65
		try {
66
			InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
67
			try {
68
				return IOUtils.toString(s);
69
			} catch (IOException e) {
70
				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
71
				Thread.sleep(defaultDelay * 1000);
72
				errorList.add(e.getMessage());
73
				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
74
			}
75
			finally{
76
				IOUtils.closeQuietly(s);
77
			}
78
		} catch (InterruptedException e) {
79
			throw new CollectorServiceException(e);
80
		}
81
	}
82

    
83
	private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
84
			throws CollectorServiceException {
85

    
86
		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
87

    
88
		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
89
		try {
90
			InputStream input = null;
91

    
92
			try {
93
				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
94
				urlConn.setInstanceFollowRedirects(false);
95
				urlConn.setReadTimeout(readTimeOut * 1000);
96
				urlConn.addRequestProperty("User-Agent", userAgent);
97

    
98
				if (log.isDebugEnabled()) {
99
					logHeaderFields(urlConn);
100
				}
101

    
102
				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
103
				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
104
					log.warn("waiting and repeating request after " + retryAfter + " sec.");
105
					Thread.sleep(retryAfter * 1000);
106
					errorList.add("503 Service Unavailable");
107
					urlConn.disconnect();
108
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
109
				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode()
110
						== HttpURLConnection.HTTP_MOVED_TEMP)) {
111
					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
112
					log.debug("The requested url has been moved to " + newUrl);
113
					errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
114
					urlConn.disconnect();
115
					return attemptDownload(newUrl, retryNumber + 1, errorList);
116
				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
117
					log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
118
					Thread.sleep(defaultDelay * 1000);
119
					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
120
					urlConn.disconnect();
121
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
122
				} else {
123
					input = urlConn.getInputStream();
124
					responseType = urlConn.getContentType();
125
					return input;
126
				}
127
			} catch (IOException e) {
128
				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
129
				Thread.sleep(defaultDelay * 1000);
130
				errorList.add(e.getMessage());
131
				return attemptDownload(requestUrl, retryNumber + 1, errorList);
132
			}
133
		} catch (InterruptedException e) {
134
			throw new CollectorServiceException(e);
135
		}
136
	}
137

    
138
	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
139
		log.debug("StatusCode: " + urlConn.getResponseMessage());
140

    
141
		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
142
			if (e.getKey() != null) {
143
				for (String v : e.getValue()) {
144
					log.debug("  key: " + e.getKey() + " - value: " + v);
145
				}
146
			}
147
		}
148
	}
149

    
150
	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
151
		for (String key : headerMap.keySet()) {
152
			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
153
				return Integer
154
						.parseInt(headerMap.get(key).get(0)) + 10;
155
			}
156
		}
157
		return -1;
158
	}
159

    
160
	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
161
		for (String key : headerMap.keySet()) {
162
			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
163
		}
164
		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
165
	}
166

    
167
	/**
168
	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
169
	 */
170
	public void initTrustManager() {
171
		final X509TrustManager tm = new X509TrustManager() {
172

    
173
			@Override
174
			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
175
			}
176

    
177
			@Override
178
			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
179
			}
180

    
181
			@Override
182
			public X509Certificate[] getAcceptedIssuers() {
183
				return null;
184
			}
185
		};
186
		try {
187
			final SSLContext ctx = SSLContext.getInstance("TLS");
188
			ctx.init(null, new TrustManager[] { tm }, null);
189
			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
190
		} catch (GeneralSecurityException e) {
191
			log.fatal(e);
192
			throw new IllegalStateException(e);
193
		}
194
	}
195

    
196
	public int getMaxNumberOfRetry() {
197
		return maxNumberOfRetry;
198
	}
199

    
200
	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
201
		this.maxNumberOfRetry = maxNumberOfRetry;
202
	}
203

    
204
	public int getDefaultDelay() {
205
		return defaultDelay;
206
	}
207

    
208
	public void setDefaultDelay(final int defaultDelay) {
209
		this.defaultDelay = defaultDelay;
210
	}
211

    
212
	public int getReadTimeOut() {
213
		return readTimeOut;
214
	}
215

    
216
	public void setReadTimeOut(final int readTimeOut) {
217
		this.readTimeOut = readTimeOut;
218
	}
219

    
220
	public String getResponseType() {
221
		return responseType;
222
	}
223

    
224
}
(8-8/8)