Project

General

Profile

« Previous | Next » 

Revision 58995

Plugin that collects records from DOI resolvers based on a given list of DOIs

View differences:

modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIteratorTest.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import org.junit.Assert;
4
import org.junit.Before;
5
import org.junit.Test;
6
import org.junit.runner.RunWith;
7
import org.mockito.Mock;
8
import org.mockito.junit.MockitoJUnitRunner;
9

  
10
import static org.mockito.Mockito.when;
11

  
12
@RunWith(MockitoJUnitRunner.class)
13
public class DOIResolverIteratorTest {
14

  
15
    @Mock
16
    CrossrefResolver resolver;
17
    DOIResolverIterator it;
18

  
19
    @Before
20
    public void setup(){
21
        when(resolver.resolve("1")).thenReturn("RECORD1");
22
        when(resolver.resolve("2")).thenReturn(null);
23
        when(resolver.resolve("3")).thenReturn("RECORD3");
24
        String file = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv").getFile();
25
        it = new DOIResolverIterator(file, resolver);
26
    }
27

  
28
    @Test
29
    public void test(){
30
        while(it.hasNext()){
31
            System.out.println(it.next());
32
        }
33
    }
34

  
35
    @Test
36
    public void testCleanOk(){
37
        String doi = "10.1234/1234";
38
        Assert.assertEquals(doi, it.cleanDOI(doi));
39
    }
40

  
41
    @Test
42
    public void testCleanHttp(){
43
        String doi = "10.1234/1234";
44
        String doiURL = "http://dx.doi.org/"+doi;
45
        Assert.assertEquals(doi, it.cleanDOI(doiURL));
46
    }
47

  
48
    @Test
49
    public void testCleanHttps(){
50
        String doi = "10.1234/1234";
51
        String doiURL = "https://dx.doi.org/"+doi;
52
        Assert.assertEquals(doi, it.cleanDOI(doiURL));
53
    }
54
}
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/CrossrefResolverTest.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import eu.dnetlib.data.collector.plugins.HttpConnector;
4
import org.junit.Assert;
5
import org.junit.Before;
6
import org.junit.Ignore;
7
import org.junit.Test;
8

  
9
@Ignore
10
public class CrossrefResolverTest {
11

  
12
    HttpConnector httpConnector;
13
    CrossrefResolver resolver;
14

  
15
    @Before
16
    public void setup(){
17
        httpConnector = new HttpConnector();
18
        resolver = new CrossrefResolver();
19
        resolver.setHttpConnector(httpConnector);
20
    }
21

  
22
    @Test
23
    public void testUnexistingDOI(){
24
        String doi = "abcd";
25
        Assert.assertNull(resolver.resolve(doi));
26
    }
27

  
28
    @Test
29
    public void testResolveDOI(){
30
        String doi = "10.1016/j.carbpol.2020.115930";
31
        String record = resolver.resolve(doi);
32
        Assert.assertNotNull(record);
33
        System.out.println(record);
34
    }
35

  
36
}
modules/dnet-collector-plugins/trunk/src/test/resources/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv
1
1
2
2
3
3
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIterator.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
4
import org.apache.commons.lang.StringUtils;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
import java.io.IOException;
9
import java.nio.file.Files;
10
import java.nio.file.Paths;
11
import java.util.Iterator;
12
import java.util.concurrent.ArrayBlockingQueue;
13

  
14
public class DOIResolverIterator implements Iterator<String> {
15

  
16
    private static final Log log = LogFactory.getLog(DOIResolverIterator.class);
17

  
18
    private static final String STARTER = "FIRE";
19
    private static final String TERMINATOR = "ARNOLD";
20
    private static final String BAD_TERMINATOR = "BAD";
21
    private static final String UNRESOLVED = "UNRESOLVED";
22

  
23
    /** Path to the file that contains a list of DOIs, one per line. **/
24
    private String filePath;
25

  
26
    private ArrayBlockingQueue<String> queue;
27

  
28
    private CrossrefResolver crossrefResolver;
29

  
30

  
31
    public DOIResolverIterator(final String filePath, final CrossrefResolver crossrefResolver) {
32
        this.filePath = filePath;
33
        this.queue = new ArrayBlockingQueue<>(100);
34
        this.crossrefResolver = crossrefResolver;
35
        init();
36
    }
37

  
38
    private void init(){
39
        new Thread(() -> {
40
            // put first item in the queue
41
            if(queue.offer(STARTER)) {
42
                // read the file, ask the resolvers, put results in a shared queue
43
                //whatever exceptions, add terminator to the queue
44
                try{
45
                    Files.lines(Paths.get(filePath)).forEach(doi -> queue.offer(resolve(doi)));
46
                } catch (IOException e) {
47
                    log.error(e);
48
                    queue.offer(BAD_TERMINATOR);
49
                }
50
            }
51
            queue.offer(TERMINATOR);
52

  
53

  
54
        }
55
        ).start();
56
    }
57

  
58
    private String resolve(final String doi){
59
       log.debug("Resolving "+doi);
60
       log.debug("Crossref...");
61
       String record = crossrefResolver.resolve(cleanDOI(doi));
62
       if(StringUtils.isNotBlank(record)) return record;
63
       else {
64
           //try another resolver
65
       }
66
       return UNRESOLVED;
67
    }
68

  
69
    /**
70
     * Returns the identifier part of the DOI only.
71
     * @param doi
72
     * @return the DOI
73
     */
74
    protected String cleanDOI(final String doi){
75
       return doi.replace("http://dx.doi.org/", "").replace("https://dx.doi.org/", "");
76
    }
77

  
78
    @Override
79
    public boolean hasNext() {
80
        //If I get a null value, the queue is currently empty. so we wait for something
81
        if(queue.peek() == null){
82
            try {
83
                Thread.sleep(10);
84
            } catch (InterruptedException e) {
85
                e.printStackTrace();
86
            }
87
            return hasNext();
88
        }
89
       if(queue.peek().equals(TERMINATOR) || queue.peek().equals(BAD_TERMINATOR)){
90
           return false;
91
       }
92
       if(queue.peek().equals(UNRESOLVED) || queue.peek().equals(STARTER)){
93
           queue.poll();
94
           return hasNext();
95
       }
96
        return true;
97

  
98
    }
99

  
100
    @Override
101
    public String next() {
102
        return queue.poll();
103
    }
104

  
105
    public String getFilePath() {
106
        return filePath;
107
    }
108

  
109
    public void setFilePath(String filePath) {
110
        this.filePath = filePath;
111
    }
112

  
113
    public CrossrefResolver getCrossrefResolver() {
114
        return crossrefResolver;
115
    }
116

  
117
    public void setCrossrefResolver(CrossrefResolver crossrefResolver) {
118
        this.crossrefResolver = crossrefResolver;
119
    }
120
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/CrossrefResolver.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import eu.dnetlib.data.collector.plugins.HttpConnector;
4
import eu.dnetlib.data.collector.plugins.utils.JsonUtils;
5
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
6
import org.apache.commons.io.IOUtils;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.springframework.beans.factory.annotation.Autowired;
10

  
11
import java.io.IOException;
12
import java.io.InputStream;
13

  
14
public class CrossrefResolver implements DOIResolver{
15

  
16
    private static final Log log = LogFactory.getLog(CrossrefResolver.class);
17
    private String baseURL = "https://api.crossref.org/works/";
18

  
19
    @Autowired
20
    private HttpConnector httpConnector;
21

  
22
    @Override
23
    public String resolve(String doi) {
24
        try {
25
            InputStream is = httpConnector.getInputSourceAsStream(getBaseURL()+doi);
26
            return asXml(IOUtils.toString(is));
27
        } catch (IOException | CollectorServiceException e) {
28
            log.error("Cannot resolve doi "+doi+" Exception: "+e);
29
            return null;
30
        }
31
    }
32

  
33
    protected String asXml(final String record){
34
       return new JsonUtils().convertToXML(record);
35
    }
36

  
37
    public String getBaseURL() {
38
        return baseURL;
39
    }
40

  
41
    public void setBaseURL(final String baseURL) {
42
        this.baseURL = baseURL;
43
    }
44

  
45
    public HttpConnector getHttpConnector() {
46
        return httpConnector;
47
    }
48

  
49
    public void setHttpConnector(HttpConnector httpConnector) {
50
        this.httpConnector = httpConnector;
51
    }
52
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverPlugin.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6

  
7
public class DOIResolverPlugin extends AbstractCollectorPlugin {
8

  
9
    private CrossrefResolver crossrefResolver;
10

  
11
    @Override
12
    public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
13
        //check baseurl not blank
14
        return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver);
15
    }
16

  
17
    public CrossrefResolver getCrossrefResolver() {
18
        return crossrefResolver;
19
    }
20

  
21
    public void setCrossrefResolver(CrossrefResolver crossrefResolver) {
22
        this.crossrefResolver = crossrefResolver;
23
    }
24
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolver.java
1
package eu.dnetlib.data.collector.plugins.doiresolver;
2

  
3
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
4

  
5
public interface DOIResolver {
6

  
7
    String resolve(String doi);
8

  
9
    void setBaseURL(String baseURL);
10
}
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
202 202
		</property>
203 203
	</bean>
204 204

  
205
	<bean id="doiResolverPlugin" class="eu.dnetlib.data.collector.plugins.doiresolver.DOIResolverPlugin" p:crossrefResolver-ref="crossrefResolver" >
206
		<property name="protocolDescriptor">
207
			<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="doi_resolver"/>
208
		</property>
209

  
210
	</bean>
211

  
212
	<bean id="crossrefResolver" class="eu.dnetlib.data.collector.plugins.doiresolver.CrossrefResolver"/>
213

  
205 214
</beans>
modules/dnet-collector-plugins/trunk/pom.xml
113 113
			<version>2.6</version>
114 114
			<scope>compile</scope>
115 115
		</dependency>
116
	</dependencies>
116
        <dependency>
117
            <groupId>org.mockito</groupId>
118
            <artifactId>mockito-core</artifactId>
119
            <version>3.3.3</version>
120
            <scope>test</scope>
121
        </dependency>
122
    </dependencies>
117 123
</project>

Also available in: Unified diff