Project

General

Profile

« Previous | Next » 

Revision 38905

[maven-release-plugin] copy for tag dnet-download-plugins-2.1.9

View differences:

modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-download-plugins/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-download-plugins"}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/test/java/DateIntervalTest.java
1
import org.joda.time.DateTime;
2
import org.joda.time.Days;
3
import org.junit.Assert;
4
import org.junit.Before;
5
import org.junit.Test;
6

  
7
public class DateIntervalTest {
8

  
9
	@Before
10
	public void setUp() throws Exception {}
11

  
12
	@Test
13
	public void test() {
14
		DateTime beforeDate = new DateTime(2010, 01, 1, 0, 0);
15
		DateTime now = new DateTime();
16

  
17
		Days day = Days.daysBetween(beforeDate, now);
18
		Assert.assertTrue(day.getDays() > 0);
19
	}
20

  
21
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/test/java/eu/dnetlib/download/plugin/ArxivImportFromFileTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import junit.framework.Assert;
4

  
5
import org.junit.Before;
6
import org.junit.Test;
7

  
8
import com.google.gson.Gson;
9
import com.google.gson.GsonBuilder;
10

  
11
import eu.dnetlib.data.download.rmi.DownloadItem;
12

  
13
public class ArxivImportFromFileTest {
14

  
15
	@Before
16
	public void setUp() throws Exception {}
17

  
18
	@Test
19
	public void testREgEx() {
20
		String regExp = "\\d{4}\\.\\d{4}";
21

  
22
		String input = "1308.0021";
23

  
24
		Assert.assertTrue(input.matches(regExp));
25

  
26
	}
27

  
28
	@Test
29
	public void testPlugin() {
30
		DownloadItem item = new DownloadItem();
31
		item.setOpenAccess("OPEN");
32
		String[] myList = new String[] { "http://arxiv.org/abs/1308.0001", "b" };
33
		Gson g = new GsonBuilder().disableHtmlEscaping().create();
34
		item.setUrl(g.toJson(myList));
35

  
36
		ArxivImportFromFile plugin = new ArxivImportFromFile();
37
		plugin.setBasePath("/test/path");
38

  
39
		plugin.retrieveUrl(item);
40

  
41
		Assert.assertNotNull(item);
42

  
43
	}
44

  
45
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/EuropePMC.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8

  
9
import com.google.common.base.Function;
10
import com.google.common.collect.Iterables;
11
import com.google.gson.Gson;
12

  
13
import eu.dnetlib.data.download.rmi.DownloadItem;
14
import eu.dnetlib.data.download.rmi.DownloadPlugin;
15

  
16
// TODO: Auto-generated Javadoc
17
/**
18
 * The Class EuropePMC.
19
 */
20
public class EuropePMC extends AbstractDownloadPlugin implements DownloadPlugin {
21

  
22
	private static final Log log = LogFactory.getLog(EuropePMC.class);
23

  
24
	/** The base path. */
25
	private String basePath;
26

  
27
	// //*[local-name()='metadata']//*[local-name()='identifier' and ./@identifierType='pmc']/text()
28

  
29
	/*
30
	 * (non-Javadoc)
31
	 *
32
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
33
	 */
34
	@Override
35
	public String getPluginName() {
36
		return "europePMCDownloadPlugin";
37
	}
38

  
39
	@Override
40
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
41
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
42

  
43
			@Override
44
			public DownloadItem apply(final DownloadItem input) {
45
				return retrieveUrl(input);
46
			}
47
		});
48
	}
49

  
50
	/*
51
	 * (non-Javadoc)
52
	 *
53
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
54
	 */
55
	@Override
56
	public DownloadItem retrieveUrl(final DownloadItem input) {
57

  
58
		String url = input.getOriginalUrl();
59
		if (url == null || url.trim().length() == 0) return input;
60
		@SuppressWarnings("unchecked")
61
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
62
		log.debug(String.format("urls is %s", url));
63
		if (urls == null || urls.size() == 0) return input;
64
		input.setFileName(input.getIdItemMetadata());
65
		Boolean added = false;
66
		for (String s : urls) {
67
			if (s.startsWith("http")) {
68
				log.debug(String.format("found url starting with http replace original URL with %s", s));
69
				input.setOriginalUrl(s);
70
			} else if (s.startsWith("PMC")) {
71
				String correctUrl = s.replace("PMC", "");
72
				log.debug(String.format("found url starting with PMC %s", correctUrl));
73
				added = true;
74
				String path = PathRetreiver.getInstance(basePath).getPathForPMCID(Integer.parseInt(correctUrl));
75
				if (path != null) {
76
					input.setUrl("file://" + path);
77
				} else {
78
					input.setUrl(path);
79
				}
80

  
81
			}
82
		}
83
		if (added == false) {
84
			input.setOriginalUrl(null);
85
			input.setUrl(null);
86
		}
87
		return input;
88

  
89
	}
90

  
91
	/**
92
	 * Gets the base path.
93
	 *
94
	 * @return the basePath
95
	 */
96
	public String getBasePath() {
97
		return basePath;
98
	}
99

  
100
	/**
101
	 * Sets the base path.
102
	 *
103
	 * @param basePath
104
	 *            the basePath to set
105
	 */
106
	@Override
107
	public void setBasePath(final String basePath) {
108
		this.basePath = basePath;
109
	}
110

  
111
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/ArxivDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9

  
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12

  
13
public class ArxivDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
14

  
15
	@Override
16
	public DownloadItem retrieveUrl(final DownloadItem input) {
17
		if (checkOpenAccess(input) == null) { return null; }
18
		String url = input.getOriginalUrl();
19
		if (url == null || url.trim().length() == 0) { return input; }
20
		@SuppressWarnings("unchecked")
21
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
22
		if (urls == null || urls.size() == 0) { return input; }
23
		for (String s : urls) {
24
			if (s.startsWith("http")) {
25
				input.setOriginalUrl(s);
26
				String correctUrl = s.replace("abs", "pdf");
27
				correctUrl += ".pdf";
28
				input.setUrl(correctUrl);
29
			}
30
		}
31
		return input;
32
	}
33

  
34
	@Override
35
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
36
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
37

  
38
			@Override
39
			public DownloadItem apply(final DownloadItem input) {
40
				return retrieveUrl(input);
41
			}
42
		});
43
	}
44

  
45
	@Override
46
	public String getPluginName() {
47
		return "ArxivDownloadPlugin";
48
	}
49

  
50
	@Override
51
	public void setBasePath(final String basePath) {
52

  
53
	}
54
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/EasyPDFDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9

  
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12

  
13
/**
14
 * The Class EasyPDFDownloadPlugin.
15
 */
16
public class EasyPDFDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
17

  
18
	/*
19
	 * (non-Javadoc)
20
	 * 
21
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
22
	 */
23
	@Override
24
	public DownloadItem retrieveUrl(final DownloadItem input) {
25
		if (checkOpenAccess(input) == null) { return null; }
26
		if (input == null) { return null; }
27
		String url = input.getOriginalUrl();
28

  
29
		if (url == null || url.trim().length() == 0) { return input; }
30
		@SuppressWarnings("unchecked")
31
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
32
		if (urls == null || urls.size() == 0) { return input; }
33
		for (String s : urls) {
34

  
35
			if (s.trim().endsWith(".pdf")) {
36
				input.setOriginalUrl(s);
37
				input.setUrl(s);
38
				return input;
39
			}
40
		}
41
		input.setOriginalUrl(null);
42
		input.setUrl(null);
43
		return input;
44
	}
45

  
46
	@Override
47
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
48
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
49

  
50
			@Override
51
			public DownloadItem apply(final DownloadItem input) {
52
				return retrieveUrl(input);
53
			}
54
		});
55
	}
56

  
57
	/*
58
	 * (non-Javadoc)
59
	 * 
60
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
61
	 */
62
	@Override
63
	public String getPluginName() {
64
		return "easyPDFDownloadPlugin";
65
	}
66

  
67
	@Override
68
	public void setBasePath(final String basePath) {
69
		// TODO Auto-generated method stub
70

  
71
	}
72

  
73
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/ELisDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.jsoup.Jsoup;
9
import org.jsoup.nodes.Document;
10
import org.jsoup.nodes.Element;
11
import org.jsoup.select.Elements;
12

  
13
import com.google.common.base.Function;
14
import com.google.common.collect.Iterables;
15
import com.google.gson.Gson;
16

  
17
import eu.dnetlib.data.download.rmi.DownloadItem;
18
import eu.dnetlib.data.download.rmi.DownloadPlugin;
19

  
20
/**
21
 * The Class ELisDownloadPlugin.
22
 */
23
public class ELisDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
24

  
25
	/** The Constant log. */
26
	private static final Log log = LogFactory.getLog(ELisDownloadPlugin.class);
27

  
28
	/**
29
	 * Extract url.
30
	 *
31
	 * @param url
32
	 *            the url
33
	 * @return the string
34
	 */
35
	private String extractURL(final String url) {
36
		try {
37
			Document doc = Jsoup.connect(url).get();
38
			Elements links = doc.select("a[href$=.pdf]");
39
			for (Element link : links) {
40
				String linkvalue = link.attr("abs:href");
41
				if (!linkvalue.toLowerCase().contains("thumbnailversion")) { return linkvalue; }
42
			}
43
			return null;
44
		} catch (Exception e) {
45
			log.error("Error on extract URL", e);
46
			return null;
47
		}
48

  
49
	}
50

  
51
	@Override
52
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
53
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
54

  
55
			@Override
56
			public DownloadItem apply(final DownloadItem input) {
57
				return retrieveUrl(input);
58
			}
59
		});
60
	}
61

  
62
	/*
63
	 * (non-Javadoc)
64
	 * 
65
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
66
	 */
67
	@Override
68
	public String getPluginName() {
69
		return "ELisDownloadPlugin";
70
	}
71

  
72
	/*
73
	 * (non-Javadoc)
74
	 * 
75
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
76
	 */
77
	@Override
78
	public DownloadItem retrieveUrl(final DownloadItem input) {
79
		if (checkOpenAccess(input) == null) { return null; }
80
		String url = input.getOriginalUrl();
81

  
82
		if (url == null || url.trim().length() == 0) { return input; }
83
		@SuppressWarnings("unchecked")
84
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
85
		if (urls == null || urls.size() == 0) { return input; }
86
		for (String s : urls) {
87
			String newURL = extractURL(s);
88
			if (newURL != null) {
89
				input.setOriginalUrl(s);
90
				input.setUrl(newURL);
91
				return input;
92
			}
93
		}
94
		input.setOriginalUrl(null);
95
		input.setUrl(null);
96
		return input;
97
	}
98

  
99
	@Override
100
	public void setBasePath(final String basePath) {
101
		// TODO Auto-generated method stub
102

  
103
	}
104

  
105
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/PathRetreiver.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.io.File;
4
import java.io.FileFilter;
5
import java.util.ArrayList;
6
import java.util.Collections;
7
import java.util.Comparator;
8
import java.util.List;
9

  
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13

  
14
// TODO: Auto-generated Javadoc
15
/**
16
 * The Class PathRetreiver.
17
 */
18
public class PathRetreiver {
19

  
20
	private static final Log log = LogFactory.getLog(PathRetreiver.class);
21

  
22
	/** The instance. */
23
	private static PathRetreiver instance;
24

  
25
	/**
26
	 * Gets the single instance of PathRetreiver.
27
	 *
28
	 * @param base_path
29
	 *            the base_path
30
	 * @return single instance of PathRetreiver
31
	 */
32
	public static PathRetreiver getInstance(final String base_path) {
33
		if (instance == null) {
34
			instance = new PathRetreiver();
35
			instance.setBase_path(base_path);
36
		}
37
		return instance;
38
	}
39

  
40
	/** The base_path. */
41
	private String base_path;
42

  
43
	/** The values. */
44
	private List<InfoPath> values;
45

  
46
	/**
47
	 * Bootstrap.
48
	 */
49
	private void bootstrap() {
50
		values = new ArrayList<InfoPath>();
51
		File basePath = new File(this.base_path);
52
		File[] selectedFiles = basePath.listFiles(new FileFilter() {
53

  
54
			@Override
55
			public boolean accept(final File pathname) {
56
				return pathname.isDirectory();
57
			}
58
		});
59

  
60
		for (File f : selectedFiles) {
61
			String lower = StringUtils.substringAfter(StringUtils.substringBefore(f.getName(), "_"), "PMC");
62
			String upper = StringUtils.substringAfter(StringUtils.substringAfter(f.getName(), "_"), "PMC");
63
			String path = f.getPath();
64
			InfoPath i = new InfoPath();
65
			i.setLower(Integer.parseInt(lower));
66
			i.setUpper(Integer.parseInt(upper));
67
			i.setPath(path);
68
			values.add(i);
69
		}
70

  
71
		Collections.sort(values, new Comparator<InfoPath>() {
72

  
73
			@Override
74
			public int compare(final InfoPath o1, final InfoPath o2) {
75
				if (o1.getLower() < o2.getLower()) return -1;
76
				else if (o1.getLower() < o2.getLower()) return 0;
77
				else return 1;
78
			}
79
		});
80
		if (log.isDebugEnabled()) {
81
			for (InfoPath p : values) {
82
				log.debug(String.format("%s -- %s : %s", p.getLower(), p.getUpper(), p.getPath()));
83
			}
84
		}
85

  
86
	}
87

  
88
	/**
89
	 * Gets the path for pmcid.
90
	 *
91
	 * @param pmcID
92
	 *            the pmc id
93
	 * @return the path for pmcid
94
	 */
95
	public String getPathForPMCID(final int pmcID) {
96
		if (values == null) {
97
			bootstrap();
98
		}
99
		for (int i = 0; i < values.size(); i++) {
100
			if (pmcID < values.get(i).getLower()) {
101
				if (i == 0) return null;
102
				String currentPath = values.get(i - 1).getPath() + "/" + pmcID + ".xml";
103
				File f = new File(currentPath);
104
				log.debug(String.format("try to search in path %s", currentPath));
105
				String s = null;
106
				if (f.exists()) {
107
					s = f.getPath();
108
					log.debug(String.format("found in %s", s));
109
				} else {
110
					log.debug(String.format("not found in %s", s));
111
				}
112
				return s;
113

  
114
			}
115
		}
116
		log.debug(String.format("PMC with ID: %s not found", pmcID));
117
		return null;
118
	}
119

  
120
	/**
121
	 * Sets the base_path.
122
	 *
123
	 * @param base_path
124
	 *            the new base_path
125
	 */
126
	public void setBase_path(final String base_path) {
127
		this.base_path = base_path;
128
	}
129

  
130
	/**
131
	 * Gets the base_path.
132
	 *
133
	 * @return the base_path
134
	 */
135
	public String getBase_path() {
136
		return this.base_path;
137
	}
138
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/ArxivImportFromFile.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.io.File;
4
import java.nio.file.Path;
5
import java.nio.file.Paths;
6
import java.util.List;
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11

  
12
import com.google.common.base.Function;
13
import com.google.common.collect.Iterables;
14
import com.google.gson.Gson;
15

  
16
import eu.dnetlib.data.download.rmi.DownloadItem;
17
import eu.dnetlib.data.download.rmi.DownloadPlugin;
18

  
19
public class ArxivImportFromFile extends AbstractDownloadPlugin implements DownloadPlugin {
20

  
21
	private static final Log log = LogFactory.getLog(ArxivImportFromFile.class); // NOPMD by marko on 11/24/08 5:02 PM
22

  
23
	/** The base path. */
24
	private String basePath;
25

  
26
	@Override
27
	public DownloadItem retrieveUrl(final DownloadItem item) {
28
		if (checkOpenAccess(item) == null) return null;
29
		final String baseURLs = item.getUrl();
30
		final List<String> urlsList = new Gson().fromJson(baseURLs, List.class);
31
		for (final String baseURL : urlsList) {
32

  
33
			if (baseURL.isEmpty() == false && baseURL.trim().startsWith("http://") == true) {
34
				final String name = StringUtils.substringAfter(baseURL, "abs/").trim();
35
				if (name == null) {
36
					item.setUrl(null);
37
					return item;
38
				}
39

  
40
				final String fileURL = createPath(name);
41
				if (StringUtils.isBlank(fileURL)) {
42
					item.setUrl(null);
43
					return item;
44
				}
45

  
46
				final File f = new File(fileURL);
47
				if (f.exists()) {
48
					if (log.isDebugEnabled()) {
49
						log.debug("found path associated to " + item.getIdItemMetadata() + " with path : " + fileURL);
50
					}
51
					item.setUrl("file://" + fileURL);
52
				} else {
53
					if (log.isDebugEnabled()) {
54
						log.debug("NOT found path associated to " + item.getIdItemMetadata());
55
					}
56
					item.setUrl(null);
57
				}
58
				item.setOriginalUrl(baseURL);
59
				return item;
60

  
61
			} else {
62
				item.setUrl(null);
63
			}
64

  
65
		}
66
		return item;
67
	}
68

  
69
	public String createPath(final String name) {
70
		final String regExp = "\\d{4}\\.\\d{4}";
71
		if (name.matches(regExp)) {
72
			final String[] values = name.split("\\.");
73
			final Path bsPath = Paths.get(basePath);
74
			final Path filePath = Paths.get(String.format("%s/%s.pdf", values[0], name));
75

  
76
			final String fileURL = bsPath.resolve(filePath).toString();
77
			return fileURL;
78
		} else {
79
			if (name.contains("/")) {
80
				final String[] values = name.split("/");
81
				if (values.length != 2) return null;
82
				if (values[1].length() > 4) {
83
					final String middle = values[1].substring(0, 4);
84
					final Path bsPath = Paths.get(basePath);
85
					final Path filePath = Paths.get(String.format("%s/%s.pdf", middle, name.replace("/", "")));
86
					final String fileURL = bsPath.resolve(filePath).toString();
87
					return fileURL;
88
				}
89
			}
90
		}
91
		return null;
92
	}
93

  
94
	@Override
95
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> items) {
96
		return Iterables.transform(items, new Function<DownloadItem, DownloadItem>() {
97

  
98
			@Override
99
			public DownloadItem apply(final DownloadItem input) {
100
				return retrieveUrl(input);
101
			}
102
		});
103
	}
104

  
105
	@Override
106
	public String getPluginName() {
107
		return "ArxivImportFromFile";
108
	}
109

  
110
	/**
111
	 * @return the basePath
112
	 */
113
	public String getBasePath() {
114
		return basePath;
115
	}
116

  
117
	/**
118
	 * @param basePath
119
	 *            the basePath to set
120
	 */
121
	@Override
122
	public void setBasePath(final String basePath) {
123
		this.basePath = basePath;
124
	}
125

  
126
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/InfoPath.java
1
package eu.dnetlib.download.plugin;
2

  
3
/**
4
 * The Class InfoPath.
5
 */
6
public class InfoPath {
7

  
8
	/** The lower. */
9
	private int lower;
10

  
11
	/** The upper. */
12
	private int upper;
13

  
14
	/** The path. */
15
	private String path;
16

  
17
	/**
18
	 * Gets the lower.
19
	 *
20
	 * @return the lower
21
	 */
22
	public int getLower() {
23
		return lower;
24
	}
25

  
26
	/**
27
	 * Sets the lower.
28
	 *
29
	 * @param lower
30
	 *            the new lower
31
	 */
32
	public void setLower(final int lower) {
33
		this.lower = lower;
34
	}
35

  
36
	/**
37
	 * Gets the upper.
38
	 *
39
	 * @return the upper
40
	 */
41
	public int getUpper() {
42
		return upper;
43
	}
44

  
45
	/**
46
	 * Sets the upper.
47
	 *
48
	 * @param upper
49
	 *            the new upper
50
	 */
51
	public void setUpper(final int upper) {
52
		this.upper = upper;
53
	}
54

  
55
	/**
56
	 * Gets the path.
57
	 *
58
	 * @return the path
59
	 */
60
	public String getPath() {
61
		return path;
62
	}
63

  
64
	/**
65
	 * Sets the path.
66
	 *
67
	 * @param path
68
	 *            the new path
69
	 */
70
	public void setPath(final String path) {
71
		this.path = path;
72
	}
73
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/DLibPlugin.java
1
/**
2
 *
3
 */
4
package eu.dnetlib.download.plugin;
5

  
6

  
7

  
8

  
9

  
10
import java.util.ArrayList;
11
import java.util.List;
12

  
13
import com.google.common.base.Function;
14
import com.google.common.collect.Iterables;
15
import com.google.gson.Gson;
16

  
17
import eu.dnetlib.data.download.rmi.DownloadItem;
18
import eu.dnetlib.data.download.rmi.DownloadPlugin;
19

  
20
/**
21
 * The Class EasyPDFDownloadPlugin.
22
 */
23
public class DLibPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
24

  
25
	/*
26
	 * (non-Javadoc)
27
	 *
28
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
29
	 */
30
	@Override
31
	public DownloadItem retrieveUrl(final DownloadItem input) {
32
		if (checkOpenAccess(input) == null) return null;
33
		if (input == null) return null;
34
		String url = input.getOriginalUrl();
35

  
36
		if ((url == null) || (url.trim().length() == 0)) return input;
37
		@SuppressWarnings("unchecked")
38
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
39
		if ((urls == null) || (urls.size() == 0)) return input;
40
		for (String s : urls) {
41

  
42
			if (s.trim().endsWith(".html")) {
43
				input.setOriginalUrl(s);
44
				input.setUrl(s);
45
				return input;
46
			}
47
		}
48
		input.setOriginalUrl(null);
49
		input.setUrl(null);
50
		return input;
51
	}
52

  
53
	@Override
54
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
55
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
56

  
57
			@Override
58
			public DownloadItem apply(final DownloadItem input) {
59
				return retrieveUrl(input);
60
			}
61
		});
62
	}
63

  
64
	/*
65
	 * (non-Javadoc)
66
	 *
67
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
68
	 */
69
	@Override
70
	public String getPluginName() {
71
		return "DLIBDownloadPlugin";
72
	}
73

  
74
	@Override
75
	public void setBasePath(final String basePath) {
76
		// TODO Auto-generated method stub
77

  
78
	}
79

  
80
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/AbstractDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.List;
4

  
5
import org.joda.time.DateTime;
6
import org.joda.time.Days;
7

  
8
import eu.dnetlib.data.download.rmi.DownloadItem;
9
import eu.dnetlib.data.download.rmi.DownloadItem.OpenAccessValues;
10

  
11
// TODO: Auto-generated Javadoc
12
/**
13
 * The Class AbstractDownloadPlugin.
14
 */
15
public abstract class AbstractDownloadPlugin {
16

  
17
	/** The regular expression. */
18
	protected List<String> regularExpression;
19

  
20

  
21
	/**
22
	 * Check open access.
23
	 *
24
	 * @param input the input
25
	 * @return the download item
26
	 */
27
	public DownloadItem checkOpenAccess(final DownloadItem input) {
28
		if (input != null) {
29
			OpenAccessValues openAccess = OpenAccessValues.valueOf(input.getOpenAccess());
30
			switch (openAccess) {
31
			case OPEN:
32
				return input;
33
			case CLOSED:
34
			case RESTRICTED:
35
			case UNKNOWN:
36
				return null;
37
			case EMBARGO:
38
				if (input.getEmbargoDate() == null) return null;
39
				DateTime embargoDate = new DateTime(input.getEmbargoDate());
40
				DateTime today = new DateTime();
41
				Days days = Days.daysBetween(embargoDate, today);
42
				if (days.getDays() <= 0) return input;
43
				return null;
44
			}
45

  
46
		}
47
		return null;
48
	}
49

  
50

  
51
	/**
52
	 * Gets the regular expression.
53
	 *
54
	 * @return the regular expression
55
	 */
56
	public List<String> getRegularExpression() {
57
		return regularExpression;
58
	}
59

  
60

  
61
	/**
62
	 * Sets the regular expression.
63
	 *
64
	 * @param regularExpression the new regular expression
65
	 */
66
	public void setRegularExpression(final List<String> regularExpression) {
67
		this.regularExpression = regularExpression;
68
	}
69

  
70
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/FollowPDFLinkPlugins.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.net.HttpURLConnection;
4
import java.net.URL;
5
import java.util.ArrayList;
6
import java.util.List;
7

  
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.jsoup.Jsoup;
11
import org.jsoup.nodes.Document;
12
import org.jsoup.nodes.Element;
13
import org.jsoup.select.Elements;
14

  
15
import com.google.common.base.Function;
16
import com.google.common.collect.Iterables;
17
import com.google.gson.Gson;
18

  
19
import eu.dnetlib.data.download.rmi.DownloadItem;
20
import eu.dnetlib.data.download.rmi.DownloadPlugin;
21

  
22
public class FollowPDFLinkPlugins extends AbstractDownloadPlugin implements DownloadPlugin {
23

  
24
	/** The Constant log. */
25
	private static final Log log = LogFactory.getLog(FollowPDFLinkPlugins.class);
26

  
27
	/**
28
	 * Extract url.
29
	 *
30
	 * @param url
31
	 *            the url
32
	 * @return the string
33
	 */
34
	private String extractURL(final String url) {
35
		try {
36
			URL startURL = new URL(url);
37
			HttpURLConnection conn = (HttpURLConnection) startURL.openConnection();
38
			conn.setInstanceFollowRedirects(true);  // you still need to handle redirect manully.
39
			HttpURLConnection.setFollowRedirects(true);
40
			String location = url;
41
			if ((conn.getResponseCode() >= 300) && (conn.getResponseCode() < 400)) {
42
				location = conn.getHeaderFields().get("Location").get(0);
43
				conn.disconnect();
44
			}
45
			Document doc = Jsoup.connect(location).get();
46
			Elements links = doc.select("a[href$=.pdf]");
47

  
48
			for (Element link : links) {
49
				String linkvalue = link.attr("abs:href");
50
				if (regularExpression!= null) {
51
					for (String regex: regularExpression) {
52
						if (linkvalue.matches(regex))
53
							return linkvalue;
54
					}
55
				} else
56
					return linkvalue;
57
			}
58
			return null;
59
		} catch (Exception e) {
60
			log.error("Error on extract URL", e);
61
			return null;
62
		}
63

  
64
	}
65

  
66
	@Override
67
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
68
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
69

  
70
			@Override
71
			public DownloadItem apply(final DownloadItem input) {
72
				return retrieveUrl(input);
73
			}
74
		});
75
	}
76

  
77
	/*
78
	 * (non-Javadoc)
79
	 *
80
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
81
	 */
82
	@Override
83
	public String getPluginName() {
84
		return "FollowPDFLinkPlugins";
85
	}
86

  
87
	/*
88
	 * (non-Javadoc)
89
	 *
90
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
91
	 */
92
	@Override
93
	public DownloadItem retrieveUrl(final DownloadItem input) {
94
		if (checkOpenAccess(input) == null) return null;
95
		String url = input.getOriginalUrl();
96

  
97
		if ((url == null) || (url.trim().length() == 0)) return input;
98
		@SuppressWarnings("unchecked")
99
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
100
		if ((urls == null) || (urls.size() == 0)) return input;
101
		for (String s : urls) {
102
			String newURL = extractURL(s);
103
			if (newURL != null) {
104
				input.setOriginalUrl(s);
105
				input.setUrl(newURL);
106
				return input;
107
			}
108
		}
109
		input.setOriginalUrl(null);
110
		input.setUrl(null);
111
		return input;
112
	}
113

  
114
	@Override
115
	public void setBasePath(final String basePath) {
116
		// TODO Auto-generated method stub
117

  
118
	}
119

  
120
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/java/eu/dnetlib/download/plugin/HALPdfDocumentPlugin.java
1
/**
2
 *
3
 */
4
package eu.dnetlib.download.plugin;
5

  
6
import java.util.ArrayList;
7
import java.util.List;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11

  
12
import com.google.common.base.Function;
13
import com.google.common.collect.Iterables;
14
import com.google.gson.Gson;
15

  
16
import eu.dnetlib.data.download.rmi.DownloadItem;
17
import eu.dnetlib.data.download.rmi.DownloadPlugin;
18

  
19

  
20
/**
21
 * @author sandro
22
 *
23
 */
24
public class HALPdfDocumentPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
25

  
26
	private static final Log log = LogFactory.getLog(HALPdfDocumentPlugin.class); // NOPMD by marko on 11/24/08 5:02 PM
27

  
28

  
29
	/**
30
	 * {@inheritDoc}
31
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
32
	 */
33
	@Override
34
	public String getPluginName() {
35
		// TODO Auto-generated method stub
36
		return "HALPdfDocumentPlugin";
37
	}
38

  
39
	/**
40
	 * {@inheritDoc}
41
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retireveUrls(java.lang.Iterable)
42
	 */
43
	@Override
44
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
45
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
46

  
47
			@Override
48
			public DownloadItem apply(final DownloadItem input) {
49
				return retrieveUrl(input);
50
			}
51
		});
52
	}
53

  
54
	/**
55
	 * {@inheritDoc}
56
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
57
	 */
58
	@Override
59
	public DownloadItem retrieveUrl(final DownloadItem input) {
60
		try{
61
			if (checkOpenAccess(input) == null) return null;
62
			if (input == null) return null;
63
			String url = input.getOriginalUrl();
64

  
65
			if ((url == null) || (url.trim().length() == 0)) return input;
66
			@SuppressWarnings("unchecked")
67
			List<String> urls = new Gson().fromJson(url, ArrayList.class);
68
			if ((urls == null) || (urls.size() == 0)) return input;
69
			for (String s : urls) {
70

  
71
				if (s.trim().toLowerCase().endsWith("document")) {
72
					input.setOriginalUrl(s);
73
					input.setUrl(s);
74
					return input;
75
				}
76
			}
77
			input.setOriginalUrl(null);
78
			input.setUrl(null);
79
			return input;
80
		} catch (Throwable e) {
81
			log.error("Error on retreiving URL",e);
82
			input.setOriginalUrl(null);
83
			input.setUrl(null);
84
			return input;
85
		}
86
	}
87

  
88
	/**
89
	 * {@inheritDoc}
90
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#setBasePath(java.lang.String)
91
	 */
92
	@Override
93
	public void setBasePath(final String arg0) {
94
		// TODO Auto-generated method stub
95

  
96
	}
97

  
98
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/src/main/resources/eu/dnetlib/download/plugin/applicationContext-node-plugins.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jaxws="http://cxf.apache.org/jaxws"
4
	xmlns:sec="http://cxf.apache.org/configuration/security" xmlns:wsa="http://cxf.apache.org/ws/addressing"
5
	xmlns:p="http://www.springframework.org/schema/p" xmlns:http="http://cxf.apache.org/transports/http/configuration"
6
	xmlns:t="http://dnetlib.eu/springbeans/t" xmlns:template="http://dnetlib.eu/springbeans/template"
7
	xmlns:util="http://www.springframework.org/schema/util"
8
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
9
                                    http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd
10
                                    http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd
11
                                    http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd
12
                            http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd
13
                            http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd
14
                            http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd">
15

  
16

  
17

  
18
	<bean id= "easyPDFDownloadPlugin" class="eu.dnetlib.download.plugin.EasyPDFDownloadPlugin"/>
19
	
20
	<bean id= "arxivDownloadPlugin" class="eu.dnetlib.download.plugin.ArxivDownloadPlugin"/>
21
	
22
	<bean id= "europePMCDownloadPlugin" class="eu.dnetlib.download.plugin.EuropePMC"/>
23
	
24
	<bean id= "elisDownloadPlugin" class="eu.dnetlib.download.plugin.ELisDownloadPlugin"/>
25
	
26
	<bean id="followPDFLinkPlugins" class="eu.dnetlib.download.plugin.FollowPDFLinkPlugins"/>
27
	
28
	<bean id="ArxivImportFromFile" class="eu.dnetlib.download.plugin.ArxivImportFromFile"/>	
29
	
30
	<bean id="DLIBDownloadPlugin" class="eu.dnetlib.download.plugin.DLibPlugin"/>	
31
	
32
	<bean id="HALPdfDocumentPlugin" class="eu.dnetlib.download.plugin.HALPdfDocumentPlugin"/>
33
	
34

  
35
</beans>
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9/pom.xml
1
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2
	<modelVersion>4.0.0</modelVersion>
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet-parent</artifactId>
6
		<version>1.0.0</version>
7
	</parent>
8
	<groupId>eu.dnetlib</groupId>
9
	<artifactId>dnet-download-plugins</artifactId>
10
	<version>2.1.9</version>
11
	<scm>
12
	  <developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.9</developerConnection>
13
	</scm>
14
	<dependencies>
15
		<dependency>
16
			<groupId>eu.dnetlib</groupId>
17
			<artifactId>dnet-download-service</artifactId>
18
			<version>[2.0.0,3.0.0)</version>
19
		</dependency>
20
		<dependency>
21
			<groupId>org.jsoup</groupId>
22
			<artifactId>jsoup</artifactId>
23
			<version>1.7.2</version>
24
		</dependency>
25
		<dependency>
26
			<groupId>joda-time</groupId>
27
			<artifactId>joda-time</artifactId>
28
			<version>2.3</version>
29
		</dependency>
30
		<dependency>
31
			<groupId>junit</groupId>
32
			<artifactId>junit</artifactId>
33
			<version>${junit.version}</version>
34
			<scope>test</scope>
35
		</dependency>
36
	</dependencies>
37

  
38
</project>

Also available in: Unified diff