Project

General

Profile

« Previous | Next » 

Revision 58485

added signposting plugin (publication boundary)

View differences:

modules/dnet-download-plugins/trunk/src/test/java/eu/dnetlib/download/plugin/SignpostingPluginTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import org.junit.Assert;
4
import org.junit.Ignore;
5
import org.junit.Test;
6

  
7
@Ignore
8
public class SignpostingPluginTest {
9

  
10
	@Test
11
	public void extractUrl(){
12
		final SignpostingPlugin plugin = new SignpostingPlugin();
13
		//final String fulltextUrl = plugin.extractURL("http://ktisis.cut.ac.cy/handle/10488/4268");
14
		//final String fulltextUrl = plugin.extractURL("https://pub.uni-bielefeld.de/record/2937099");
15
		//final String fulltextUrl = plugin.extractURL("https://tore.tuhh.de/handle/11420/5569");
16
		final String fulltextUrl = plugin.extractURL("https://www.annalsofgeophysics.eu/index.php/annals/article/view/8321");
17
		System.out.println("fulltext: " + fulltextUrl);
18
		Assert.assertEquals(fulltextUrl, "https://www.annalsofgeophysics.eu/index.php/annals/article/download/8321/7200"); //"https://tore.tuhh.de/bitstream/11420/5569/1/Aberle2020_Article_WhoBenefitsFromMobilityAsAServ.pdf");
19
	}
20
}
modules/dnet-download-plugins/trunk/src/test/java/eu/dnetlib/download/plugin/DSpacePDFLinkPluginsTest.java
20 20

  
21 21

  
22 22
    @Test
23
    public void extractUrl(){
24
        final DSpacePDFLinkPlugins plugin = new DSpacePDFLinkPlugins();
25
    	final String url = plugin.extractURL("http://dx.doi.org/10.1155/2006/690694"); //https://www.annalsofgeophysics.eu/index.php/annals/article/view/7507");
26
    	Assert.assertEquals(url, "http://downloads.hindawi.com/journals/sp/2006/690694.pdf"); //https://www.annalsofgeophysics.eu/index.php/annals/article/download/7507/6808");
27
    }
28
    
29
    @Test
23 30
    public void testURL () {
24 31
        final DSpacePDFLinkPlugins plugin = new DSpacePDFLinkPlugins();
25 32

  
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/SignpostingPlugin.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.download.plugin;
5

  
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.Iterator;
9
import java.util.List;
10

  
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.apache.http.Header;
14
import org.apache.http.HeaderElement;
15
import org.apache.http.HttpResponse;
16
import org.apache.http.NameValuePair;
17
import org.apache.http.client.HttpClient;
18
import org.apache.http.client.config.RequestConfig;
19
import org.apache.http.client.config.RequestConfig.Builder;
20
import org.apache.http.client.methods.HttpGet;
21
import org.apache.http.impl.client.HttpClientBuilder;
22

  
23
import com.google.common.base.Function;
24
import com.google.common.collect.Iterables;
25
import com.google.gson.Gson;
26

  
27
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
28
import eu.dnetlib.data.download.rmi.DownloadItem;
29
import eu.dnetlib.data.download.rmi.DownloadPlugin;
30
import eu.dnetlib.data.download.rmi.DownloadPluginException;
31

  
32
/**
33
 * The Signposting plugin supports the Publication Boundary Pattern in order to use the fulltext url for download.
34
 * @author jochen
35
 *
36
 */
37
public class SignpostingPlugin extends AbstractDownloadPlugin implements DownloadPlugin{
38

  
39
    /**
40
     * The Constant log.
41
     */
42
    private static final Log log = LogFactory.getLog(SignpostingPlugin.class);
43

  
44
	/* (non-Javadoc)
45
	 * @see eu.dnetlib.data.download.rmi.AbstractDownloadPlugin#extractURL(java.lang.String)
46
	 */
47
	@Override
48
	public String extractURL(String url) throws DownloadPluginException {
49
		try{
50
			Builder config = RequestConfig.custom()
51
					.setConnectionRequestTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
52
					.setConnectTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT)
53
					.setSocketTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT);
54
			
55
	        HttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config.build()) .build();
56
	        HttpGet request = new HttpGet(url);
57
	        HttpResponse response = client.execute(request);
58
	        log.debug("status code: " + response.getStatusLine().getStatusCode());
59
	        Header[] links = response.getHeaders("Link");
60
	        Iterator<Header> iterator = Arrays.asList(links).iterator();
61
	        while(iterator.hasNext()){
62
	        	Iterator<HeaderElement> iteratorElements = Arrays.asList(iterator.next().getElements()).iterator();
63
	        	String name = "";
64
	        	while(iteratorElements.hasNext()){
65
	        		HeaderElement element = iteratorElements.next();
66
	        		name = element.getName();
67
	        		NameValuePair[] nvPair = element.getParameters();
68
		        	String rel = "";
69
		        	String type = "";
70
	        		for (int j = 0; j < nvPair.length; j++){
71
	        			if (nvPair[j].getName().equals("rel") && nvPair[j].getValue().equals("item"))
72
	        					rel = "item";
73
	        			if (nvPair[j].getName().equals("type") && nvPair[j].getValue().equals("application/pdf"))
74
	        				type = "application/pdf";
75
	        			log.debug("param name: " + nvPair[j].getName() + " param value: " + nvPair[j].getValue());
76
	        		}
77
	        		if (rel.equals("item") && type.equals("application/pdf"))
78
	        			return name.replaceAll("^<|>$", "");
79
	        	}
80
	        }
81
		}catch(Throwable e){
82
	        throw new DownloadPluginException("Error on extract URL", e);			
83
		}
84
		
85
		return null;
86
	}
87

  
88
	@Override
89
	public String getPluginName() {
90
		return "SignpostingPlugin";
91
	}
92

  
93
	@Override
94
	public DownloadItem retrieveUrl(DownloadItem input) throws DownloadPluginException {
95
        if (checkOpenAccess(input) == null) return null;
96
        String url = input.getOriginalUrl();
97

  
98
        if ((url == null) || (url.trim().length() == 0)) return input;
99
        @SuppressWarnings("unchecked")
100
        List<String> urls = new Gson().fromJson(url, ArrayList.class);
101
        if ((urls == null) || (urls.size() == 0)) return input;
102
        if (checkUrlsNotNull(input, urls))
103
            return input;
104
        input.setOriginalUrl(null);
105
        input.setUrl(null);
106
        return input;
107
	}
108

  
109
	@Override
110
	public Iterable<DownloadItem> retrieveUrls(Iterable<DownloadItem> urls) throws DownloadPluginException {
111
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
112

  
113
            @Override
114
            public DownloadItem apply(final DownloadItem input) {
115
                return retrieveUrl(input);
116
            }
117
        });
118
	}
119

  
120
	@Override
121
	public void setBasePath(String arg0) {
122
		// TODO Auto-generated method stub
123
		
124
	}
125

  
126
}
modules/dnet-download-plugins/trunk/src/main/resources/eu/dnetlib/download/plugin/applicationContext-node-plugins.xml
29 29

  
30 30
	<bean id="DSpacePDFLinkPlugins" class="eu.dnetlib.download.plugin.DSpacePDFLinkPlugins"/>
31 31
	
32
	<bean id="signpostingPlugin" class="eu.dnetlib.download.plugin.SignpostingPlugin"/>
32 33

  
33 34
</beans>
modules/dnet-download-plugins/trunk/pom.xml
34 34
			<version>3.5</version>
35 35
		</dependency>
36 36
		<dependency>
37
			<groupId>org.apache.httpcomponents</groupId>
38
			<artifactId>httpclient</artifactId>
39
			<version>[4.5.0, 4.6.0]</version>
40
		</dependency>
41
		<dependency>
37 42
			<groupId>junit</groupId>
38 43
			<artifactId>junit</artifactId>
39 44
			<version>${junit.version}</version>

Also available in: Unified diff