1
|
/**
|
2
|
*
|
3
|
*/
|
4
|
package eu.dnetlib.download.plugin;
|
5
|
|
6
|
import java.util.ArrayList;
|
7
|
import java.util.Arrays;
|
8
|
import java.util.Iterator;
|
9
|
import java.util.List;
|
10
|
|
11
|
import org.apache.commons.logging.Log;
|
12
|
import org.apache.commons.logging.LogFactory;
|
13
|
import org.apache.http.Header;
|
14
|
import org.apache.http.HeaderElement;
|
15
|
import org.apache.http.HttpResponse;
|
16
|
import org.apache.http.NameValuePair;
|
17
|
import org.apache.http.client.HttpClient;
|
18
|
import org.apache.http.client.config.RequestConfig;
|
19
|
import org.apache.http.client.config.RequestConfig.Builder;
|
20
|
import org.apache.http.client.methods.HttpGet;
|
21
|
import org.apache.http.impl.client.HttpClientBuilder;
|
22
|
|
23
|
import com.google.common.base.Function;
|
24
|
import com.google.common.collect.Iterables;
|
25
|
import com.google.gson.Gson;
|
26
|
|
27
|
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
|
28
|
import eu.dnetlib.data.download.rmi.DownloadItem;
|
29
|
import eu.dnetlib.data.download.rmi.DownloadPlugin;
|
30
|
import eu.dnetlib.data.download.rmi.DownloadPluginException;
|
31
|
|
32
|
/**
|
33
|
* The Signposting plugin supports the Publication Boundary Pattern in order to use the fulltext url for download.
|
34
|
* @author jochen
|
35
|
*
|
36
|
*/
|
37
|
public class SignpostingPlugin extends AbstractDownloadPlugin implements DownloadPlugin{
|
38
|
|
39
|
/**
|
40
|
* The Constant log.
|
41
|
*/
|
42
|
private static final Log log = LogFactory.getLog(SignpostingPlugin.class);
|
43
|
|
44
|
/* (non-Javadoc)
|
45
|
* @see eu.dnetlib.data.download.rmi.AbstractDownloadPlugin#extractURL(java.lang.String)
|
46
|
*/
|
47
|
@Override
|
48
|
public String extractURL(String url) throws DownloadPluginException {
|
49
|
try{
|
50
|
Builder config = RequestConfig.custom()
|
51
|
.setConnectionRequestTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
|
52
|
.setConnectTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
|
53
|
.setSocketTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT);
|
54
|
|
55
|
HttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config.build()) .build();
|
56
|
HttpGet request = new HttpGet(url);
|
57
|
HttpResponse response = client.execute(request);
|
58
|
log.debug("status code: " + response.getStatusLine().getStatusCode());
|
59
|
Header[] links = response.getHeaders("Link");
|
60
|
Iterator<Header> iterator = Arrays.asList(links).iterator();
|
61
|
while(iterator.hasNext()){
|
62
|
Iterator<HeaderElement> iteratorElements = Arrays.asList(iterator.next().getElements()).iterator();
|
63
|
String name = "";
|
64
|
while(iteratorElements.hasNext()){
|
65
|
HeaderElement element = iteratorElements.next();
|
66
|
name = element.getName();
|
67
|
NameValuePair[] nvPair = element.getParameters();
|
68
|
String rel = "";
|
69
|
String type = "";
|
70
|
for (int j = 0; j < nvPair.length; j++){
|
71
|
if (nvPair[j].getName().equals("rel") && nvPair[j].getValue().equals("item"))
|
72
|
rel = "item";
|
73
|
if (nvPair[j].getName().equals("type") && nvPair[j].getValue().equals("application/pdf"))
|
74
|
type = "application/pdf";
|
75
|
log.debug("param name: " + nvPair[j].getName() + " param value: " + nvPair[j].getValue());
|
76
|
}
|
77
|
if (rel.equals("item") && type.equals("application/pdf"))
|
78
|
return name.replaceAll("^<|>$", "");
|
79
|
}
|
80
|
}
|
81
|
}catch(Throwable e){
|
82
|
throw new DownloadPluginException("Error on extract URL", e);
|
83
|
}
|
84
|
|
85
|
return null;
|
86
|
}
|
87
|
|
88
|
@Override
|
89
|
public String getPluginName() {
|
90
|
return "SignpostingPlugin";
|
91
|
}
|
92
|
|
93
|
@Override
|
94
|
public DownloadItem retrieveUrl(DownloadItem input) throws DownloadPluginException {
|
95
|
if (checkOpenAccess(input) == null) return null;
|
96
|
String url = input.getOriginalUrl();
|
97
|
|
98
|
if ((url == null) || (url.trim().length() == 0)) return input;
|
99
|
@SuppressWarnings("unchecked")
|
100
|
List<String> urls = new Gson().fromJson(url, ArrayList.class);
|
101
|
if ((urls == null) || (urls.size() == 0)) return input;
|
102
|
if (checkUrlsNotNull(input, urls))
|
103
|
return input;
|
104
|
input.setOriginalUrl(null);
|
105
|
input.setUrl(null);
|
106
|
return input;
|
107
|
}
|
108
|
|
109
|
@Override
|
110
|
public Iterable<DownloadItem> retrieveUrls(Iterable<DownloadItem> urls) throws DownloadPluginException {
|
111
|
return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
|
112
|
|
113
|
@Override
|
114
|
public DownloadItem apply(final DownloadItem input) {
|
115
|
return retrieveUrl(input);
|
116
|
}
|
117
|
});
|
118
|
}
|
119
|
|
120
|
@Override
|
121
|
public void setBasePath(String arg0) {
|
122
|
// TODO Auto-generated method stub
|
123
|
|
124
|
}
|
125
|
|
126
|
}
|