Revision 58995
Added by Alessia Bardi almost 4 years ago
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIteratorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import org.junit.Assert; |
|
4 |
import org.junit.Before; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.runner.RunWith; |
|
7 |
import org.mockito.Mock; |
|
8 |
import org.mockito.junit.MockitoJUnitRunner; |
|
9 |
|
|
10 |
import static org.mockito.Mockito.when; |
|
11 |
|
|
12 |
@RunWith(MockitoJUnitRunner.class) |
|
13 |
public class DOIResolverIteratorTest { |
|
14 |
|
|
15 |
@Mock |
|
16 |
CrossrefResolver resolver; |
|
17 |
DOIResolverIterator it; |
|
18 |
|
|
19 |
@Before |
|
20 |
public void setup(){ |
|
21 |
when(resolver.resolve("1")).thenReturn("RECORD1"); |
|
22 |
when(resolver.resolve("2")).thenReturn(null); |
|
23 |
when(resolver.resolve("3")).thenReturn("RECORD3"); |
|
24 |
String file = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv").getFile(); |
|
25 |
it = new DOIResolverIterator(file, resolver); |
|
26 |
} |
|
27 |
|
|
28 |
@Test |
|
29 |
public void test(){ |
|
30 |
while(it.hasNext()){ |
|
31 |
System.out.println(it.next()); |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
@Test |
|
36 |
public void testCleanOk(){ |
|
37 |
String doi = "10.1234/1234"; |
|
38 |
Assert.assertEquals(doi, it.cleanDOI(doi)); |
|
39 |
} |
|
40 |
|
|
41 |
@Test |
|
42 |
public void testCleanHttp(){ |
|
43 |
String doi = "10.1234/1234"; |
|
44 |
String doiURL = "http://dx.doi.org/"+doi; |
|
45 |
Assert.assertEquals(doi, it.cleanDOI(doiURL)); |
|
46 |
} |
|
47 |
|
|
48 |
@Test |
|
49 |
public void testCleanHttps(){ |
|
50 |
String doi = "10.1234/1234"; |
|
51 |
String doiURL = "https://dx.doi.org/"+doi; |
|
52 |
Assert.assertEquals(doi, it.cleanDOI(doiURL)); |
|
53 |
} |
|
54 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/CrossrefResolverTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
4 |
import org.junit.Assert; |
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.Ignore; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
@Ignore |
|
10 |
public class CrossrefResolverTest { |
|
11 |
|
|
12 |
HttpConnector httpConnector; |
|
13 |
CrossrefResolver resolver; |
|
14 |
|
|
15 |
@Before |
|
16 |
public void setup(){ |
|
17 |
httpConnector = new HttpConnector(); |
|
18 |
resolver = new CrossrefResolver(); |
|
19 |
resolver.setHttpConnector(httpConnector); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testUnexistingDOI(){ |
|
24 |
String doi = "abcd"; |
|
25 |
Assert.assertNull(resolver.resolve(doi)); |
|
26 |
} |
|
27 |
|
|
28 |
@Test |
|
29 |
public void testResolveDOI(){ |
|
30 |
String doi = "10.1016/j.carbpol.2020.115930"; |
|
31 |
String record = resolver.resolve(doi); |
|
32 |
Assert.assertNotNull(record); |
|
33 |
System.out.println(record); |
|
34 |
} |
|
35 |
|
|
36 |
} |
modules/dnet-collector-plugins/trunk/src/test/resources/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv | ||
---|---|---|
1 |
1 |
|
2 |
2 |
|
3 |
3 |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIterator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugins.utils.JsonUtils; |
|
4 |
import org.apache.commons.lang.StringUtils; |
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
|
|
8 |
import java.io.IOException; |
|
9 |
import java.nio.file.Files; |
|
10 |
import java.nio.file.Paths; |
|
11 |
import java.util.Iterator; |
|
12 |
import java.util.concurrent.ArrayBlockingQueue; |
|
13 |
|
|
14 |
public class DOIResolverIterator implements Iterator<String> { |
|
15 |
|
|
16 |
private static final Log log = LogFactory.getLog(DOIResolverIterator.class); |
|
17 |
|
|
18 |
private static final String STARTER = "FIRE"; |
|
19 |
private static final String TERMINATOR = "ARNOLD"; |
|
20 |
private static final String BAD_TERMINATOR = "BAD"; |
|
21 |
private static final String UNRESOLVED = "UNRESOLVED"; |
|
22 |
|
|
23 |
/** Path to the file that contains a list of DOIs, one per line. **/ |
|
24 |
private String filePath; |
|
25 |
|
|
26 |
private ArrayBlockingQueue<String> queue; |
|
27 |
|
|
28 |
private CrossrefResolver crossrefResolver; |
|
29 |
|
|
30 |
|
|
31 |
public DOIResolverIterator(final String filePath, final CrossrefResolver crossrefResolver) { |
|
32 |
this.filePath = filePath; |
|
33 |
this.queue = new ArrayBlockingQueue<>(100); |
|
34 |
this.crossrefResolver = crossrefResolver; |
|
35 |
init(); |
|
36 |
} |
|
37 |
|
|
38 |
private void init(){ |
|
39 |
new Thread(() -> { |
|
40 |
// put first item in the queue |
|
41 |
if(queue.offer(STARTER)) { |
|
42 |
// read the file, ask the resolvers, put results in a shared queue |
|
43 |
//whatever exceptions, add terminator to the queue |
|
44 |
try{ |
|
45 |
Files.lines(Paths.get(filePath)).forEach(doi -> queue.offer(resolve(doi))); |
|
46 |
} catch (IOException e) { |
|
47 |
log.error(e); |
|
48 |
queue.offer(BAD_TERMINATOR); |
|
49 |
} |
|
50 |
} |
|
51 |
queue.offer(TERMINATOR); |
|
52 |
|
|
53 |
|
|
54 |
} |
|
55 |
).start(); |
|
56 |
} |
|
57 |
|
|
58 |
private String resolve(final String doi){ |
|
59 |
log.debug("Resolving "+doi); |
|
60 |
log.debug("Crossref..."); |
|
61 |
String record = crossrefResolver.resolve(cleanDOI(doi)); |
|
62 |
if(StringUtils.isNotBlank(record)) return record; |
|
63 |
else { |
|
64 |
//try another resolver |
|
65 |
} |
|
66 |
return UNRESOLVED; |
|
67 |
} |
|
68 |
|
|
69 |
/** |
|
70 |
* Returns the identifier part of the DOI only. |
|
71 |
* @param doi |
|
72 |
* @return the DOI |
|
73 |
*/ |
|
74 |
protected String cleanDOI(final String doi){ |
|
75 |
return doi.replace("http://dx.doi.org/", "").replace("https://dx.doi.org/", ""); |
|
76 |
} |
|
77 |
|
|
78 |
@Override |
|
79 |
public boolean hasNext() { |
|
80 |
//If I get a null value, the queue is currently empty. so we wait for something |
|
81 |
if(queue.peek() == null){ |
|
82 |
try { |
|
83 |
Thread.sleep(10); |
|
84 |
} catch (InterruptedException e) { |
|
85 |
e.printStackTrace(); |
|
86 |
} |
|
87 |
return hasNext(); |
|
88 |
} |
|
89 |
if(queue.peek().equals(TERMINATOR) || queue.peek().equals(BAD_TERMINATOR)){ |
|
90 |
return false; |
|
91 |
} |
|
92 |
if(queue.peek().equals(UNRESOLVED) || queue.peek().equals(STARTER)){ |
|
93 |
queue.poll(); |
|
94 |
return hasNext(); |
|
95 |
} |
|
96 |
return true; |
|
97 |
|
|
98 |
} |
|
99 |
|
|
100 |
@Override |
|
101 |
public String next() { |
|
102 |
return queue.poll(); |
|
103 |
} |
|
104 |
|
|
105 |
public String getFilePath() { |
|
106 |
return filePath; |
|
107 |
} |
|
108 |
|
|
109 |
public void setFilePath(String filePath) { |
|
110 |
this.filePath = filePath; |
|
111 |
} |
|
112 |
|
|
113 |
public CrossrefResolver getCrossrefResolver() { |
|
114 |
return crossrefResolver; |
|
115 |
} |
|
116 |
|
|
117 |
public void setCrossrefResolver(CrossrefResolver crossrefResolver) { |
|
118 |
this.crossrefResolver = crossrefResolver; |
|
119 |
} |
|
120 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/CrossrefResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
4 |
import eu.dnetlib.data.collector.plugins.utils.JsonUtils; |
|
5 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
6 |
import org.apache.commons.io.IOUtils; |
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
|
|
11 |
import java.io.IOException; |
|
12 |
import java.io.InputStream; |
|
13 |
|
|
14 |
public class CrossrefResolver implements DOIResolver{ |
|
15 |
|
|
16 |
private static final Log log = LogFactory.getLog(CrossrefResolver.class); |
|
17 |
private String baseURL = "https://api.crossref.org/works/"; |
|
18 |
|
|
19 |
@Autowired |
|
20 |
private HttpConnector httpConnector; |
|
21 |
|
|
22 |
@Override |
|
23 |
public String resolve(String doi) { |
|
24 |
try { |
|
25 |
InputStream is = httpConnector.getInputSourceAsStream(getBaseURL()+doi); |
|
26 |
return asXml(IOUtils.toString(is)); |
|
27 |
} catch (IOException | CollectorServiceException e) { |
|
28 |
log.error("Cannot resolve doi "+doi+" Exception: "+e); |
|
29 |
return null; |
|
30 |
} |
|
31 |
} |
|
32 |
|
|
33 |
protected String asXml(final String record){ |
|
34 |
return new JsonUtils().convertToXML(record); |
|
35 |
} |
|
36 |
|
|
37 |
public String getBaseURL() { |
|
38 |
return baseURL; |
|
39 |
} |
|
40 |
|
|
41 |
public void setBaseURL(final String baseURL) { |
|
42 |
this.baseURL = baseURL; |
|
43 |
} |
|
44 |
|
|
45 |
public HttpConnector getHttpConnector() { |
|
46 |
return httpConnector; |
|
47 |
} |
|
48 |
|
|
49 |
public void setHttpConnector(HttpConnector httpConnector) { |
|
50 |
this.httpConnector = httpConnector; |
|
51 |
} |
|
52 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
|
4 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
5 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
6 |
|
|
7 |
public class DOIResolverPlugin extends AbstractCollectorPlugin { |
|
8 |
|
|
9 |
private CrossrefResolver crossrefResolver; |
|
10 |
|
|
11 |
@Override |
|
12 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) throws CollectorServiceException { |
|
13 |
//check baseurl not blank |
|
14 |
return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver); |
|
15 |
} |
|
16 |
|
|
17 |
public CrossrefResolver getCrossrefResolver() { |
|
18 |
return crossrefResolver; |
|
19 |
} |
|
20 |
|
|
21 |
public void setCrossrefResolver(CrossrefResolver crossrefResolver) { |
|
22 |
this.crossrefResolver = crossrefResolver; |
|
23 |
} |
|
24 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
4 |
|
|
5 |
public interface DOIResolver { |
|
6 |
|
|
7 |
String resolve(String doi); |
|
8 |
|
|
9 |
void setBaseURL(String baseURL); |
|
10 |
} |
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml | ||
---|---|---|
202 | 202 |
</property> |
203 | 203 |
</bean> |
204 | 204 |
|
205 |
<bean id="doiResolverPlugin" class="eu.dnetlib.data.collector.plugins.doiresolver.DOIResolverPlugin" p:crossrefResolver-ref="crossrefResolver" > |
|
206 |
<property name="protocolDescriptor"> |
|
207 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="doi_resolver"/> |
|
208 |
</property> |
|
209 |
|
|
210 |
</bean> |
|
211 |
|
|
212 |
<bean id="crossrefResolver" class="eu.dnetlib.data.collector.plugins.doiresolver.CrossrefResolver"/> |
|
213 |
|
|
205 | 214 |
</beans> |
modules/dnet-collector-plugins/trunk/pom.xml | ||
---|---|---|
113 | 113 |
<version>2.6</version> |
114 | 114 |
<scope>compile</scope> |
115 | 115 |
</dependency> |
116 |
</dependencies> |
|
116 |
<dependency> |
|
117 |
<groupId>org.mockito</groupId> |
|
118 |
<artifactId>mockito-core</artifactId> |
|
119 |
<version>3.3.3</version> |
|
120 |
<scope>test</scope> |
|
121 |
</dependency> |
|
122 |
</dependencies> |
|
117 | 123 |
</project> |
Also available in: Unified diff
Plugin that collects records from DOI resolvers based on a given list of DOIs