Project

General

Profile

« Previous | Next » 

Revision 31406

Added check on open access before download

View differences:

modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/objectStore/RetrieveURLSJobNode.java
34 34
	/** The xpath metadata id. */
35 35
	private String xpathMetadataId;
36 36

  
37
	/** The xpath open access. */
38
	private String xpathOpenAccess;
39

  
40
	/** The xpath embargo date. */
41
	private String xpathEmbargoDate;
42

  
37 43
	/** The result set client factory. */
38 44
	@Autowired
39 45
	private ResultSetClientFactory resultSetClientFactory;
......
44 50

  
45 51
	/*
46 52
	 * (non-Javadoc)
47
	 * 
53
	 *
48 54
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
49 55
	 */
50 56
	@Override
51 57
	protected String execute(final NodeToken token) throws Exception {
52 58
		final W3CEndpointReference inputEpr = (new EPRUtils()).getEpr(token.getEnv().getAttribute(inputEprParam));
53 59
		Iterable<String> input = resultSetClientFactory.getClient(inputEpr);
54
		Iterable<String> extractedUrls = Iterables.transform(input, new UrlExtractor(xpath, xpathMetadataId));
60
		Iterable<String> extractedUrls = Iterables.transform(input, new UrlExtractor(xpath, xpathMetadataId, xpathOpenAccess, xpathEmbargoDate));
55 61
		W3CEndpointReference eprUrls = resultSetFactory.createIterableResultSet(extractedUrls);
56 62
		token.getEnv().setAttribute(getOutputEprParam(), eprUrls.toString());
57 63
		return Arc.DEFAULT_ARC;
......
59 65

  
60 66
	/**
61 67
	 * Gets the xpath.
62
	 * 
68
	 *
63 69
	 * @return the xpath
64 70
	 */
65 71
	public String getXpath() {
......
68 74

  
69 75
	/**
70 76
	 * Sets the xpath.
71
	 * 
77
	 *
72 78
	 * @param xpath
73 79
	 *            the xpath to set
74 80
	 */
......
78 84

  
79 85
	/**
80 86
	 * Gets the xpath metadata id.
81
	 * 
87
	 *
82 88
	 * @return the xpathMetadataId
83 89
	 */
84 90
	public String getXpathMetadataId() {
......
87 93

  
88 94
	/**
89 95
	 * Sets the xpath metadata id.
90
	 * 
96
	 *
91 97
	 * @param xpathMetadataId
92 98
	 *            the xpathMetadataId to set
93 99
	 */
......
98 104

  
99 105
	/**
100 106
	 * Gets the result set client factory.
101
	 * 
107
	 *
102 108
	 * @return the resultSetClientFactory
103 109
	 */
104 110
	public ResultSetClientFactory getResultSetClientFactory() {
......
107 113

  
108 114
	/**
109 115
	 * Sets the result set client factory.
110
	 * 
116
	 *
111 117
	 * @param resultSetClientFactory
112 118
	 *            the resultSetClientFactory to set
113 119
	 */
......
117 123

  
118 124
	/**
119 125
	 * Gets the result set factory.
120
	 * 
126
	 *
121 127
	 * @return the resultSetFactory
122 128
	 */
123 129
	public IterableResultSetFactory getResultSetFactory() {
......
126 132

  
127 133
	/**
128 134
	 * Sets the result set factory.
129
	 * 
135
	 *
130 136
	 * @param resultSetFactory
131 137
	 *            the resultSetFactory to set
132 138
	 */
......
136 142

  
137 143
	/**
138 144
	 * Gets the output epr param.
139
	 * 
145
	 *
140 146
	 * @return the outputEprParam
141 147
	 */
142 148
	public String getOutputEprParam() {
......
145 151

  
146 152
	/**
147 153
	 * Sets the output epr param.
148
	 * 
154
	 *
149 155
	 * @param outputEprParam
150 156
	 *            the outputEprParam to set
151 157
	 */
......
154 160
	}
155 161

  
156 162
	/**
163
	 * Gets the input epr param.
164
	 *
157 165
	 * @return the inputEprParam
158 166
	 */
159 167
	public String getInputEprParam() {
......
161 169
	}
162 170

  
163 171
	/**
172
	 * Sets the input epr param.
173
	 *
164 174
	 * @param inputEprParam
165 175
	 *            the inputEprParam to set
166 176
	 */
......
168 178
		this.inputEprParam = inputEprParam;
169 179
	}
170 180

  
181
	/**
182
	 * Gets the xpath open access.
183
	 *
184
	 * @return the xpath open access
185
	 */
186
	public String getXpathOpenAccess() {
187
		return xpathOpenAccess;
188
	}
189

  
190
	/**
191
	 * Sets the xpath open access.
192
	 *
193
	 * @param xpathOpenAccess
194
	 *            the new xpath open access
195
	 */
196
	public void setXpathOpenAccess(final String xpathOpenAccess) {
197
		this.xpathOpenAccess = xpathOpenAccess;
198
	}
199

  
200
	/**
201
	 * Gets the xpath embargo date.
202
	 *
203
	 * @return the xpath embargo date
204
	 */
205
	public String getXpathEmbargoDate() {
206
		return xpathEmbargoDate;
207
	}
208

  
209
	/**
210
	 * Sets the xpath embargo date.
211
	 *
212
	 * @param xpathEmbargoDate
213
	 *            the new xpath embargo date
214
	 */
215
	public void setXpathEmbargoDate(final String xpathEmbargoDate) {
216
		this.xpathEmbargoDate = xpathEmbargoDate;
217
	}
218

  
171 219
}
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/download/DownloadFromMetadata.java
41 41

  
42 42
	/*
43 43
	 * (non-Javadoc)
44
	 * 
44
	 *
45 45
	 * @see eu.dnetlib.msro.workflows.nodes.BlackboardJobNode#getXqueryForServiceId(com.googlecode.sarasvati.NodeToken)
46 46
	 */
47 47
	@Override
......
51 51

  
52 52
	/*
53 53
	 * (non-Javadoc)
54
	 * 
54
	 *
55 55
	 * @see eu.dnetlib.msro.workflows.nodes.BlackboardJobNode#prepareJob(eu.dnetlib.enabling.tools.blackboard.BlackboardJob,
56 56
	 * com.googlecode.sarasvati.NodeToken)
57 57
	 */
......
74 74

  
75 75
			@Override
76 76
			protected void populateEnv(final Env env, final Map<String, String> responseParams) {
77

  
78 77
				env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "total", responseParams.get("total"));
79 78
			}
80 79
		};
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/download/UrlExtractor.java
11 11
import javax.xml.xpath.XPathExpression;
12 12
import javax.xml.xpath.XPathFactory;
13 13

  
14
import org.apache.commons.lang.StringUtils;
14 15
import org.apache.commons.logging.Log;
15 16
import org.apache.commons.logging.LogFactory;
17
import org.joda.time.DateTime;
18
import org.joda.time.format.DateTimeFormat;
19
import org.joda.time.format.DateTimeFormatter;
16 20
import org.w3c.dom.Document;
17 21
import org.w3c.dom.NodeList;
18 22

  
......
21 25

  
22 26
import eu.dnetlib.data.download.rmi.DownloadItem;
23 27

  
28
// TODO: Auto-generated Javadoc
24 29
/**
25 30
 * The Class UrlExtractor.
26 31
 */
27 32
public class UrlExtractor implements Function<String, String> {
28 33

  
34
	/** The Constant log. */
29 35
	private static final Log log = LogFactory.getLog(UrlExtractor.class);
30 36

  
31
	/** The xpath. */
32
	private String xpath;
37
	/** The xpath url. */
38
	private String xpathURL;
33 39

  
34 40
	/** The xpath. */
35 41
	private String xpathMetadataID;
36 42

  
37
	public UrlExtractor(final String xpath, final String xpathMetadataID) {
38
		this.xpath = xpath;
43
	/** The xpath open access. */
44
	private String xpathOpenAccess;
45

  
46
	/** The xpath embargo date. */
47
	private String xpathEmbargoDate;
48

  
49
	/**
50
	 * Instantiates a new url extractor.
51
	 *
52
	 * @param xpath
53
	 *            the xpath
54
	 * @param xpathMetadataID
55
	 *            the xpath metadata id
56
	 */
57
	public UrlExtractor(final String xpath, final String xpathMetadataID, final String xpathOpenAccess, final String xpathEmbargoDate) {
58
		this.xpathURL = xpath;
39 59
		this.xpathMetadataID = xpathMetadataID;
60
		this.xpathOpenAccess = xpathOpenAccess;
61
		this.xpathEmbargoDate = xpathEmbargoDate;
40 62
	}
41 63

  
42 64
	/*
......
47 69
	@Override
48 70
	public String apply(final String input) {
49 71
		try {
50

  
51 72
			DownloadItem di = new DownloadItem();
52 73
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
53 74
			DocumentBuilder builder;
......
55 76
			Document doc = builder.parse(new ByteArrayInputStream(input.getBytes()));
56 77
			XPathFactory xPathFactory = XPathFactory.newInstance();
57 78
			XPath myXpath = xPathFactory.newXPath();
58
			XPathExpression expression = myXpath.compile(xpath);
79
			XPathExpression expression = myXpath.compile(xpathURL);
59 80
			Object values = expression.evaluate(doc, XPathConstants.NODESET);
60 81
			di.setUrl(getNodes((NodeList) values));
61 82
			di.setOriginalUrl(getNodes((NodeList) values));
83

  
84
			if (xpathOpenAccess != null) {
85
				expression = myXpath.compile(xpathOpenAccess);
86
				String openAccess = expression.evaluate(doc);
87
				di.setOpenAccess(openAccess);
88
			}
89
			expression = myXpath.compile(xpathEmbargoDate);
90
			String embargoDate = expression.evaluate(doc);
91
			if (!StringUtils.isEmpty(embargoDate)) {
92
				try {
93
					DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd");
94
					DateTime dt = fmt.parseDateTime(embargoDate);
95
					di.setEmbargoDate(dt.toDate());
96
				} catch (Exception pe) {}
97
			}
62 98
			expression = myXpath.compile(xpathMetadataID);
63 99
			String extracted_metadataId = expression.evaluate(doc);
64 100
			di.setIdItemMetadata(extracted_metadataId);
65
			// di.setFileName(extracted_metadataId);
66 101
			return di.toJSON();
67 102
		} catch (Exception e) {
68 103
			log.error("OPSSS... Something bad happen on evaluating ", e);
......
71 106

  
72 107
	}
73 108

  
109
	/**
110
	 * Gets the nodes.
111
	 *
112
	 * @param nodes
113
	 *            the nodes
114
	 * @return the nodes
115
	 */
74 116
	private String getNodes(final NodeList nodes) {
75 117
		List<String> extracted_Url = new ArrayList<String>();
76 118
		if (nodes != null) {
......
82 124
	}
83 125

  
84 126
	/**
85
	 * Gets the xpath.
86
	 * 
87
	 * @return the xpath
127
	 * Gets the xpath metadata id.
128
	 *
129
	 * @return the xpathMetadataID
88 130
	 */
89
	public String getXpath() {
90
		return xpath;
131
	public String getXpathMetadataID() {
132
		return xpathMetadataID;
91 133
	}
92 134

  
93 135
	/**
94
	 * Sets the xpath.
95
	 * 
96
	 * @param xpath
97
	 *            the xpath to set
136
	 * Sets the xpath metadata id.
137
	 *
138
	 * @param xpathMetadataID
139
	 *            the xpathMetadataID to set
98 140
	 */
99
	public void setXpath(final String xpath) {
100
		this.xpath = xpath;
141
	public void setXpathMetadataID(final String xpathMetadataID) {
142
		this.xpathMetadataID = xpathMetadataID;
101 143
	}
102 144

  
103 145
	/**
104
	 * @return the xpathMetadataID
146
	 * Gets the xpath url.
147
	 *
148
	 * @return the xpath url
105 149
	 */
106
	public String getXpathMetadataID() {
107
		return xpathMetadataID;
150
	public String getXpathURL() {
151
		return xpathURL;
108 152
	}
109 153

  
110 154
	/**
111
	 * @param xpathMetadataID
112
	 *            the xpathMetadataID to set
155
	 * Sets the xpath url.
156
	 *
157
	 * @param xpathURL
158
	 *            the new xpath url
113 159
	 */
114
	public void setXpathMetadataID(final String xpathMetadataID) {
115
		this.xpathMetadataID = xpathMetadataID;
160
	public void setXpathURL(final String xpathURL) {
161
		this.xpathURL = xpathURL;
116 162
	}
117 163

  
164
	/**
165
	 * Gets the xpath open access.
166
	 *
167
	 * @return the xpath open access
168
	 */
169
	public String getXpathOpenAccess() {
170
		return xpathOpenAccess;
171
	}
172

  
173
	/**
174
	 * Sets the xpath open access.
175
	 *
176
	 * @param xpathOpenAccess
177
	 *            the new xpath open access
178
	 */
179
	public void setXpathOpenAccess(final String xpathOpenAccess) {
180
		this.xpathOpenAccess = xpathOpenAccess;
181
	}
182

  
183
	/**
184
	 * Gets the xpath embargo date.
185
	 *
186
	 * @return the xpath embargo date
187
	 */
188
	public String getXpathEmbargoDate() {
189
		return xpathEmbargoDate;
190
	}
191

  
192
	/**
193
	 * Sets the xpath embargo date.
194
	 *
195
	 * @param xpathEmbargoDate
196
	 *            the new xpath embargo date
197
	 */
198
	public void setXpathEmbargoDate(final String xpathEmbargoDate) {
199
		this.xpathEmbargoDate = xpathEmbargoDate;
200
	}
201

  
118 202
}
modules/dnet-msro-service/trunk/pom.xml
112 112
			<version>${junit.version}</version>
113 113
			<scope>test</scope>
114 114
		</dependency>
115
		<dependency>
116
			<groupId>joda-time</groupId>
117
			<artifactId>joda-time</artifactId>
118
			<version>2.3</version>
119
		</dependency>
115 120
	</dependencies>
116 121

  
117 122
	<properties>
modules/dnet-download-plugins/trunk/src/test/java/DateIntervalTest.java
1
import org.joda.time.DateTime;
2
import org.joda.time.Days;
3
import org.junit.Assert;
4
import org.junit.Before;
5
import org.junit.Test;
6

  
7
public class DateIntervalTest {
8

  
9
	@Before
10
	public void setUp() throws Exception {}
11

  
12
	@Test
13
	public void test() {
14
		DateTime beforeDate = new DateTime(2010, 01, 1, 0, 0);
15
		DateTime now = new DateTime();
16

  
17
		Days day = Days.daysBetween(beforeDate, now);
18
		Assert.assertTrue(day.getDays() > 0);
19
	}
20

  
21
}
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/EuropePMC.java
3 3
import java.util.ArrayList;
4 4
import java.util.List;
5 5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
6 8
import com.google.gson.Gson;
7 9

  
8 10
import eu.dnetlib.data.download.rmi.DownloadItem;
......
29 31
		return "europePMCDownloadPlugin";
30 32
	}
31 33

  
34
	@Override
35
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
36
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
37

  
38
			@Override
39
			public DownloadItem apply(final DownloadItem input) {
40
				return retrieveUrl(input);
41
			}
42
		});
43
	}
44

  
32 45
	/*
33 46
	 * (non-Javadoc)
34 47
	 * 
......
36 49
	 */
37 50
	@Override
38 51
	public DownloadItem retrieveUrl(final DownloadItem input) {
52

  
39 53
		String url = input.getOriginalUrl();
40 54
		if ((url == null) || (url.trim().length() == 0)) return input;
41 55
		@SuppressWarnings("unchecked")
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/ArxivDownloadPlugin.java
3 3
import java.util.ArrayList;
4 4
import java.util.List;
5 5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
6 8
import com.google.gson.Gson;
7 9

  
8 10
import eu.dnetlib.data.download.rmi.DownloadItem;
......
12 14

  
13 15
	@Override
14 16
	public DownloadItem retrieveUrl(final DownloadItem input) {
17
		if (checkOpenAccess(input) == null) return null;
15 18
		String url = input.getOriginalUrl();
16 19
		if ((url == null) || (url.trim().length() == 0)) return input;
17 20
		@SuppressWarnings("unchecked")
......
29 32
	}
30 33

  
31 34
	@Override
35
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
36
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
37

  
38
			@Override
39
			public DownloadItem apply(final DownloadItem input) {
40
				return retrieveUrl(input);
41
			}
42
		});
43
	}
44

  
45
	@Override
32 46
	public String getPluginName() {
33 47
		return "ArxivDownloadPlugin";
34 48
	}
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/EasyPDFDownloadPlugin.java
3 3
import java.util.ArrayList;
4 4
import java.util.List;
5 5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
6 8
import com.google.gson.Gson;
7 9

  
8 10
import eu.dnetlib.data.download.rmi.DownloadItem;
......
16 18

  
17 19
	/*
18 20
	 * (non-Javadoc)
19
	 * 
21
	 *
20 22
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
21 23
	 */
22 24
	@Override
23 25
	public DownloadItem retrieveUrl(final DownloadItem input) {
26
		if (checkOpenAccess(input) == null) return null;
24 27
		if (input == null) return null;
25 28
		String url = input.getOriginalUrl();
26 29

  
......
29 32
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
30 33
		if ((urls == null) || (urls.size() == 0)) return input;
31 34
		for (String s : urls) {
35

  
32 36
			if (s.trim().endsWith(".pdf")) {
33 37
				input.setOriginalUrl(s);
34 38
				input.setUrl(s);
......
40 44
		return input;
41 45
	}
42 46

  
47
	@Override
48
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
49
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
50

  
51
			@Override
52
			public DownloadItem apply(final DownloadItem input) {
53
				return retrieveUrl(input);
54
			}
55
		});
56
	}
57

  
43 58
	/*
44 59
	 * (non-Javadoc)
45
	 * 
60
	 *
46 61
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
47 62
	 */
48 63
	@Override
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/ELisDownloadPlugin.java
8 8
import org.jsoup.nodes.Element;
9 9
import org.jsoup.select.Elements;
10 10

  
11
import com.google.common.base.Function;
12
import com.google.common.collect.Iterables;
11 13
import com.google.gson.Gson;
12 14

  
13 15
import eu.dnetlib.data.download.rmi.DownloadItem;
......
40 42

  
41 43
	}
42 44

  
45
	@Override
46
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
47
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
48

  
49
			@Override
50
			public DownloadItem apply(final DownloadItem input) {
51
				return retrieveUrl(input);
52
			}
53
		});
54
	}
55

  
43 56
	/*
44 57
	 * (non-Javadoc)
45
	 *
58
	 * 
46 59
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
47 60
	 */
48 61
	@Override
......
52 65

  
53 66
	/*
54 67
	 * (non-Javadoc)
55
	 *
68
	 * 
56 69
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
57 70
	 */
58 71
	@Override
59 72
	public DownloadItem retrieveUrl(final DownloadItem input) {
73
		if (checkOpenAccess(input) == null) return null;
60 74
		String url = input.getOriginalUrl();
61 75

  
62 76
		if ((url == null) || (url.trim().length() == 0)) return input;
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/AbstractDownloadPlugin.java
1 1
package eu.dnetlib.download.plugin;
2 2

  
3
import com.google.common.base.Function;
4
import com.google.common.collect.Iterables;
3
import org.joda.time.DateTime;
4
import org.joda.time.Days;
5 5

  
6 6
import eu.dnetlib.data.download.rmi.DownloadItem;
7
import eu.dnetlib.data.download.rmi.DownloadPlugin;
7
import eu.dnetlib.data.download.rmi.DownloadItem.OpenAccessValues;
8 8

  
9
public abstract class AbstractDownloadPlugin implements DownloadPlugin {
9
public abstract class AbstractDownloadPlugin {
10 10

  
11
	@Override
12
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> urls) {
13
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
11
	public DownloadItem checkOpenAccess(final DownloadItem input) {
12
		if (input != null) {
13
			OpenAccessValues openAccess = OpenAccessValues.valueOf(input.getOpenAccess());
14
			switch (openAccess) {
15
			case OPEN:
16
				return input;
17
			case CLOSED:
18
			case RESTRICTED:
19
			case UNKNOWN:
20
				return null;
21
			case EMBARGO:
22
				if (input.getEmbargoDate() == null) return null;
23
				DateTime embargoDate = new DateTime(input.getEmbargoDate());
24
				DateTime today = new DateTime();
25
				Days days = Days.daysBetween(embargoDate, today);
26
				if (days.getDays() <= 0) return input;
27
				return null;
28
			}
14 29

  
15
			@Override
16
			public DownloadItem apply(final DownloadItem input) {
17
				return retrieveUrl(input);
18
			}
19
		});
30
		}
31
		return null;
20 32
	}
21 33

  
22 34
}
modules/dnet-download-plugins/trunk/pom.xml
21 21
			<artifactId>jsoup</artifactId>
22 22
			<version>1.7.2</version>
23 23
		</dependency>
24
		<dependency>
25
			<groupId>joda-time</groupId>
26
			<artifactId>joda-time</artifactId>
27
			<version>2.3</version>
28
		</dependency>
29
		<dependency>
30
			<groupId>junit</groupId>
31
			<artifactId>junit</artifactId>
32
			<version>${junit.version}</version>
33
			<scope>test</scope>
34
		</dependency>
24 35
	</dependencies>
25 36

  
26 37
</project>

Also available in: Unified diff