|
1 |
/**
|
|
2 |
*
|
|
3 |
*/
|
|
4 |
package eu.dnetlib.download.plugin;
|
|
5 |
|
|
6 |
import java.util.ArrayList;
|
|
7 |
import java.util.Arrays;
|
|
8 |
import java.util.Iterator;
|
|
9 |
import java.util.List;
|
|
10 |
|
|
11 |
import org.apache.commons.logging.Log;
|
|
12 |
import org.apache.commons.logging.LogFactory;
|
|
13 |
import org.apache.http.Header;
|
|
14 |
import org.apache.http.HeaderElement;
|
|
15 |
import org.apache.http.HttpResponse;
|
|
16 |
import org.apache.http.NameValuePair;
|
|
17 |
import org.apache.http.client.HttpClient;
|
|
18 |
import org.apache.http.client.config.RequestConfig;
|
|
19 |
import org.apache.http.client.config.RequestConfig.Builder;
|
|
20 |
import org.apache.http.client.methods.HttpGet;
|
|
21 |
import org.apache.http.impl.client.HttpClientBuilder;
|
|
22 |
|
|
23 |
import com.google.common.base.Function;
|
|
24 |
import com.google.common.collect.Iterables;
|
|
25 |
import com.google.gson.Gson;
|
|
26 |
|
|
27 |
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
|
|
28 |
import eu.dnetlib.data.download.rmi.DownloadItem;
|
|
29 |
import eu.dnetlib.data.download.rmi.DownloadPlugin;
|
|
30 |
import eu.dnetlib.data.download.rmi.DownloadPluginException;
|
|
31 |
|
|
32 |
/**
|
|
33 |
* The Signposting plugin supports the Publication Boundary Pattern in order to use the fulltext url for download.
|
|
34 |
* @author jochen
|
|
35 |
*
|
|
36 |
*/
|
|
37 |
public class SignpostingPlugin extends AbstractDownloadPlugin implements DownloadPlugin{
|
|
38 |
|
|
39 |
/**
|
|
40 |
* The Constant log.
|
|
41 |
*/
|
|
42 |
private static final Log log = LogFactory.getLog(SignpostingPlugin.class);
|
|
43 |
|
|
44 |
/* (non-Javadoc)
|
|
45 |
* @see eu.dnetlib.data.download.rmi.AbstractDownloadPlugin#extractURL(java.lang.String)
|
|
46 |
*/
|
|
47 |
@Override
|
|
48 |
public String extractURL(String url) throws DownloadPluginException {
|
|
49 |
try{
|
|
50 |
Builder config = RequestConfig.custom()
|
|
51 |
.setConnectionRequestTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
|
|
52 |
.setConnectTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
|
|
53 |
.setSocketTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT);
|
|
54 |
|
|
55 |
HttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config.build()) .build();
|
|
56 |
HttpGet request = new HttpGet(url);
|
|
57 |
HttpResponse response = client.execute(request);
|
|
58 |
log.debug("status code: " + response.getStatusLine().getStatusCode());
|
|
59 |
Header[] links = response.getHeaders("Link");
|
|
60 |
Iterator<Header> iterator = Arrays.asList(links).iterator();
|
|
61 |
while(iterator.hasNext()){
|
|
62 |
Iterator<HeaderElement> iteratorElements = Arrays.asList(iterator.next().getElements()).iterator();
|
|
63 |
String name = "";
|
|
64 |
while(iteratorElements.hasNext()){
|
|
65 |
HeaderElement element = iteratorElements.next();
|
|
66 |
name = element.getName();
|
|
67 |
NameValuePair[] nvPair = element.getParameters();
|
|
68 |
String rel = "";
|
|
69 |
String type = "";
|
|
70 |
for (int j = 0; j < nvPair.length; j++){
|
|
71 |
if (nvPair[j].getName().equals("rel") && nvPair[j].getValue().equals("item"))
|
|
72 |
rel = "item";
|
|
73 |
if (nvPair[j].getName().equals("type") && nvPair[j].getValue().equals("application/pdf"))
|
|
74 |
type = "application/pdf";
|
|
75 |
log.debug("param name: " + nvPair[j].getName() + " param value: " + nvPair[j].getValue());
|
|
76 |
}
|
|
77 |
if (rel.equals("item") && type.equals("application/pdf"))
|
|
78 |
return name.replaceAll("^<|>$", "");
|
|
79 |
}
|
|
80 |
}
|
|
81 |
}catch(Throwable e){
|
|
82 |
throw new DownloadPluginException("Error on extract URL", e);
|
|
83 |
}
|
|
84 |
|
|
85 |
return null;
|
|
86 |
}
|
|
87 |
|
|
88 |
@Override
|
|
89 |
public String getPluginName() {
|
|
90 |
return "SignpostingPlugin";
|
|
91 |
}
|
|
92 |
|
|
93 |
@Override
|
|
94 |
public DownloadItem retrieveUrl(DownloadItem input) throws DownloadPluginException {
|
|
95 |
if (checkOpenAccess(input) == null) return null;
|
|
96 |
String url = input.getOriginalUrl();
|
|
97 |
|
|
98 |
if ((url == null) || (url.trim().length() == 0)) return input;
|
|
99 |
@SuppressWarnings("unchecked")
|
|
100 |
List<String> urls = new Gson().fromJson(url, ArrayList.class);
|
|
101 |
if ((urls == null) || (urls.size() == 0)) return input;
|
|
102 |
if (checkUrlsNotNull(input, urls))
|
|
103 |
return input;
|
|
104 |
input.setOriginalUrl(null);
|
|
105 |
input.setUrl(null);
|
|
106 |
return input;
|
|
107 |
}
|
|
108 |
|
|
109 |
@Override
|
|
110 |
public Iterable<DownloadItem> retrieveUrls(Iterable<DownloadItem> urls) throws DownloadPluginException {
|
|
111 |
return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
|
|
112 |
|
|
113 |
@Override
|
|
114 |
public DownloadItem apply(final DownloadItem input) {
|
|
115 |
return retrieveUrl(input);
|
|
116 |
}
|
|
117 |
});
|
|
118 |
}
|
|
119 |
|
|
120 |
@Override
|
|
121 |
public void setBasePath(String arg0) {
|
|
122 |
// TODO Auto-generated method stub
|
|
123 |
|
|
124 |
}
|
|
125 |
|
|
126 |
}
|
added signposting plugin (publication boundary)