Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.oai.engine;
2

    
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.net.CookieHandler;
6
import java.net.CookieManager;
7
import java.net.CookiePolicy;
8
import java.net.HttpURLConnection;
9
import java.net.URL;
10
import java.security.GeneralSecurityException;
11
import java.security.KeyManagementException;
12
import java.security.NoSuchAlgorithmException;
13
import java.security.cert.CertificateException;
14
import java.security.cert.X509Certificate;
15
import java.util.List;
16
import java.util.Map;
17

    
18
import javax.net.ssl.HttpsURLConnection;
19
import javax.net.ssl.SSLContext;
20
import javax.net.ssl.TrustManager;
21
import javax.net.ssl.X509TrustManager;
22

    
23
import org.apache.commons.io.IOUtils;
24
import org.apache.commons.lang.math.NumberUtils;
25
import org.apache.commons.logging.Log;
26
import org.apache.commons.logging.LogFactory;
27

    
28
import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
29
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
30

    
31
/**
32
 * @author jochen, michele, andrea
33
 *
34
 */
35
public class HttpConnector {
36

    
37
	private static final Log log = LogFactory.getLog(HttpConnector.class);
38

    
39
	private int maxNumberOfRetry = 6;
40
	private int defaultDelay = 120; // seconds
41
	private int readTimeOut = 120; // seconds
42
	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
43

    
44
    public HttpConnector(){
45
    	CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); 
46
    }
47
    
48
	/**
49
	 * @param requestUrl
50
	 * @return the content of the downloaded resource
51
	 * @throws CollectorServiceException
52
	 */
53
	public String getInputSource(final String requestUrl) throws CollectorServiceException {
54
		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
55
	}
56

    
57
	private String attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
58
			throws CollectorServiceException {
59

    
60
		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
61

    
62
		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
63
		try {
64
			InputStream input = null;
65

    
66
			try {
67
				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
68
				urlConn.setInstanceFollowRedirects(false);
69
				urlConn.setReadTimeout(readTimeOut * 1000);
70
                                urlConn.addRequestProperty("User-Agent", userAgent);
71

    
72
				if (log.isDebugEnabled()) {
73
					logHeaderFields(urlConn);
74
				}
75

    
76
				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
77
				if (retryAfter > 0) {
78
					log.warn("waiting and repeating request after " + retryAfter + " sec.");
79
					Thread.sleep(retryAfter * 1000);
80
					errorList.add("503 Service Unavailable");
81
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
82
				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) {
83
					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
84
					log.info("The requested url has been moved to " + newUrl);
85
					errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
86
					return attemptDownload(newUrl, retryNumber + 1, errorList);
87
				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
88
					log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
89
					Thread.sleep(defaultDelay * 1000);
90
					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
91
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
92
				} else {
93
					input = urlConn.getInputStream();
94
					return IOUtils.toString(input);
95
				}
96
			} catch (IOException e) {
97
				log.error("error while retrieving from http-connection occured: " + e, e);
98
				Thread.sleep(defaultDelay * 1000);
99
				errorList.add(e.getMessage());
100
				return attemptDownload(requestUrl, retryNumber + 1, errorList);
101
			} finally {
102
				IOUtils.closeQuietly(input);
103
			}
104
		} catch (InterruptedException e) {
105
			throw new CollectorServiceException(e);
106
		}
107
	}
108

    
109
	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
110
		log.debug("StatusCode: " + urlConn.getResponseMessage());
111

    
112
		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
113
			if (e.getKey() != null) {
114
				for (String v : e.getValue()) {
115
					log.debug("  key: " + e.getKey() + " - value: " + v);
116
				}
117
			}
118
		}
119
	}
120

    
121
	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
122
		for (String key : headerMap.keySet()) {
123
			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer
124
					.parseInt(headerMap.get(key).get(0)) + 10; }
125
		}
126
		return -1;
127
	}
128

    
129
	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
130
		for (String key : headerMap.keySet()) {
131
			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
132
		}
133
		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
134
	}
135

    
136
	/**
137
	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
138
	 *
139
	 * @throws NoSuchAlgorithmException
140
	 * @throws KeyManagementException
141
	 */
142
	public void initTrustManager() {
143
		final X509TrustManager tm = new X509TrustManager() {
144

    
145
			@Override
146
			public void checkClientTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
147

    
148
			@Override
149
			public void checkServerTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
150

    
151
			@Override
152
			public X509Certificate[] getAcceptedIssuers() {
153
				return null;
154
			}
155
		};
156
		try {
157
			final SSLContext ctx = SSLContext.getInstance("TLS");
158
			ctx.init(null, new TrustManager[] { tm }, null);
159
			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
160
		} catch (GeneralSecurityException e) {
161
			log.fatal(e);
162
			throw new IllegalStateException(e);
163
		}
164
	}
165

    
166
	public int getMaxNumberOfRetry() {
167
		return maxNumberOfRetry;
168
	}
169

    
170
	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
171
		this.maxNumberOfRetry = maxNumberOfRetry;
172
	}
173

    
174
	public int getDefaultDelay() {
175
		return defaultDelay;
176
	}
177

    
178
	public void setDefaultDelay(final int defaultDelay) {
179
		this.defaultDelay = defaultDelay;
180
	}
181

    
182
	public int getReadTimeOut() {
183
		return readTimeOut;
184
	}
185

    
186
	public void setReadTimeOut(final int readTimeOut) {
187
		this.readTimeOut = readTimeOut;
188
	}
189

    
190
}
(1-1/2)