Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.net.*;
6
import java.security.GeneralSecurityException;
7
import java.security.cert.CertificateException;
8
import java.security.cert.X509Certificate;
9
import java.util.List;
10
import java.util.Map;
11
import javax.net.ssl.HttpsURLConnection;
12
import javax.net.ssl.SSLContext;
13
import javax.net.ssl.TrustManager;
14
import javax.net.ssl.X509TrustManager;
15

    
16
import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
17
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
18
import org.apache.commons.io.IOUtils;
19
import org.apache.commons.lang3.math.NumberUtils;
20
import org.apache.commons.logging.Log;
21
import org.apache.commons.logging.LogFactory;
22

    
23
/**
24
 * @author jochen, michele, andrea
25
 *
26
 */
27
public class HttpConnector {
28

    
29
	private static final Log log = LogFactory.getLog(HttpConnector.class);
30

    
31
	private int maxNumberOfRetry = 6;
32
	private int defaultDelay = 120; // seconds
33
	private int readTimeOut = 120; // seconds
34
	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
35

    
36
    public HttpConnector(){
37
    	CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); 
38
    }
39
    
40
	/**
41
	 * Given the URL returns the content via HTTP GET
42
	 *
43
	 * @param requestUrl the URL
44
	 * @return the content of the downloaded resource
45
	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
46
	 */
47
	public String getInputSource(final String requestUrl) throws CollectorServiceException {
48
		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
49
	}
50

    
51
	private String attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
52
			throws CollectorServiceException {
53

    
54
		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
55

    
56
		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
57
		try {
58
			InputStream input = null;
59

    
60
			try {
61
				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
62
				urlConn.setInstanceFollowRedirects(false);
63
				urlConn.setReadTimeout(readTimeOut * 1000);
64
				urlConn.addRequestProperty("User-Agent", userAgent);
65

    
66
				if (log.isDebugEnabled()) {
67
					logHeaderFields(urlConn);
68
				}
69

    
70
				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
71
				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
72
					log.warn("waiting and repeating request after " + retryAfter + " sec.");
73
					Thread.sleep(retryAfter * 1000);
74
					errorList.add("503 Service Unavailable");
75
					urlConn.disconnect();
76
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
77
				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) {
78
					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
79
					log.debug("The requested url has been moved to " + newUrl);
80
					errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
81
					urlConn.disconnect();
82
					return attemptDownload(newUrl, retryNumber + 1, errorList);
83
				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
84
					log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
85
					Thread.sleep(defaultDelay * 1000);
86
					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
87
					urlConn.disconnect();
88
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
89
				} else {
90
					input = urlConn.getInputStream();
91
					return IOUtils.toString(input);
92
				}
93
			} catch (IOException e) {
94
				log.error("error while retrieving from http-connection occured: " + e, e);
95
				Thread.sleep(defaultDelay * 1000);
96
				errorList.add(e.getMessage());
97
				return attemptDownload(requestUrl, retryNumber + 1, errorList);
98
			} finally {
99
				IOUtils.closeQuietly(input);
100
			}
101
		} catch (InterruptedException e) {
102
			throw new CollectorServiceException(e);
103
		}
104
	}
105

    
106
	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
107
		log.debug("StatusCode: " + urlConn.getResponseMessage());
108

    
109
		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
110
			if (e.getKey() != null) {
111
				for (String v : e.getValue()) {
112
					log.debug("  key: " + e.getKey() + " - value: " + v);
113
				}
114
			}
115
		}
116
	}
117

    
118
	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
119
		for (String key : headerMap.keySet()) {
120
			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer
121
					.parseInt(headerMap.get(key).get(0)) + 10; }
122
		}
123
		return -1;
124
	}
125

    
126
	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
127
		for (String key : headerMap.keySet()) {
128
			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
129
		}
130
		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
131
	}
132

    
133
	/**
134
	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
135
	 */
136
	public void initTrustManager() {
137
		final X509TrustManager tm = new X509TrustManager() {
138

    
139
			@Override
140
			public void checkClientTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
141

    
142
			@Override
143
			public void checkServerTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
144

    
145
			@Override
146
			public X509Certificate[] getAcceptedIssuers() {
147
				return null;
148
			}
149
		};
150
		try {
151
			final SSLContext ctx = SSLContext.getInstance("TLS");
152
			ctx.init(null, new TrustManager[] { tm }, null);
153
			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
154
		} catch (GeneralSecurityException e) {
155
			log.fatal(e);
156
			throw new IllegalStateException(e);
157
		}
158
	}
159

    
160
	public int getMaxNumberOfRetry() {
161
		return maxNumberOfRetry;
162
	}
163

    
164
	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
165
		this.maxNumberOfRetry = maxNumberOfRetry;
166
	}
167

    
168
	public int getDefaultDelay() {
169
		return defaultDelay;
170
	}
171

    
172
	public void setDefaultDelay(final int defaultDelay) {
173
		this.defaultDelay = defaultDelay;
174
	}
175

    
176
	public int getReadTimeOut() {
177
		return readTimeOut;
178
	}
179

    
180
	public void setReadTimeOut(final int readTimeOut) {
181
		this.readTimeOut = readTimeOut;
182
	}
183

    
184
}
(8-8/8)