Project

General

Profile

« Previous | Next » 

Revision 59095

DOIResolver plugin now supports multiple csv files in the input folder (baseURL) and incremental

View differences:

modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIteratorTest.java
1 1
package eu.dnetlib.data.collector.plugins.doiresolver;
2 2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
3 5
import org.junit.Assert;
4 6
import org.junit.Before;
5 7
import org.junit.Test;
......
12 14
@RunWith(MockitoJUnitRunner.class)
13 15
public class DOIResolverIteratorTest {
14 16

  
17
    private static final Log log = LogFactory.getLog(DOIResolverIteratorTest.class);
15 18
    @Mock
16 19
    CrossrefResolver resolver;
17 20
    DOIResolverIterator it;
18 21

  
22
    String dirpath;
23

  
19 24
    @Before
20 25
    public void setup(){
21 26
        when(resolver.resolve("1")).thenReturn("RECORD1");
22 27
        when(resolver.resolve("2")).thenReturn(null);
23 28
        when(resolver.resolve("3")).thenReturn("RECORD3");
24
        String file = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv").getFile();
25
        it = new DOIResolverIterator(file, resolver);
29
        dirpath = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver").getPath();
30

  
26 31
    }
27 32

  
28 33
    @Test
29 34
    public void test(){
35
        it = new DOIResolverIterator(dirpath, resolver, null);
36
        int count = 0;
30 37
        while(it.hasNext()){
31
            System.out.println(it.next());
38
            String res = it.next();
39
            log.info(res);
40
            if(count == 0) Assert.assertEquals("RECORD1", res);
41
            if(count == 1) Assert.assertEquals("RECORD3", res);
42
            count++;
32 43
        }
44
        Assert.assertEquals(2, count);
33 45
    }
34 46

  
35 47
    @Test
48
    public void testIncremental(){
49
        it = new DOIResolverIterator(dirpath, resolver, "2020-07-13");
50
        int count = 0;
51
        while(it.hasNext()){
52
            String res = it.next();
53
            count++;
54
        }
55
        Assert.assertEquals(0, count);
56
    }
57

  
58
    @Test
59
    public void testIncremental2(){
60
        it = new DOIResolverIterator(dirpath, resolver, "2020-01-13");
61
        int count = 0;
62
        while(it.hasNext()){
63
            String res = it.next();
64
            count++;
65
        }
66
        Assert.assertEquals(2, count);
67
    }
68

  
69
    @Test
36 70
    public void testCleanOk(){
71
        it = new DOIResolverIterator(dirpath, resolver, null);
37 72
        String doi = "10.1234/1234";
38 73
        Assert.assertEquals(doi, it.cleanDOI(doi));
39 74
    }
40 75

  
41 76
    @Test
42 77
    public void testCleanHttp(){
78
        it = new DOIResolverIterator(dirpath, resolver, null);
43 79
        String doi = "10.1234/1234";
44 80
        String doiURL = "http://dx.doi.org/"+doi;
45 81
        Assert.assertEquals(doi, it.cleanDOI(doiURL));
......
47 83

  
48 84
    @Test
49 85
    public void testCleanHttps(){
86
        it = new DOIResolverIterator(dirpath, resolver, null);
50 87
        String doi = "10.1234/1234";
51 88
        String doiURL = "https://dx.doi.org/"+doi;
52 89
        Assert.assertEquals(doi, it.cleanDOI(doiURL));
modules/dnet-collector-plugins/trunk/src/test/resources/log4j.properties
11 11
log4j.logger.eu.dnetlib.data.collector.plugins.projects.grist=DEBUG
12 12
log4j.logger.eu.dnetlib.data.collector.plugins.projects.gtr2=DEBUG
13 13
log4j.logger.eu.dnetlib.data.collector.plugins.doiresolver=DEBUG
14
log4j.logger.eu.dnetlib.data.collector.plugins.filesystem=DEBUG
14 15

  
15 16

  
16 17

  
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIterator.java
1 1
package eu.dnetlib.data.collector.plugins.doiresolver;
2 2

  
3
import eu.dnetlib.data.collector.plugins.filesystem.FileSystemIterator;
3 4
import org.apache.commons.lang.StringUtils;
4 5
import org.apache.commons.logging.Log;
5 6
import org.apache.commons.logging.LogFactory;
......
9 10
import java.nio.file.Paths;
10 11
import java.util.Iterator;
11 12
import java.util.concurrent.ArrayBlockingQueue;
13
import java.util.stream.Stream;
12 14

  
13 15
public class DOIResolverIterator implements Iterator<String> {
14 16

  
......
19 21
    private static final String BAD_TERMINATOR = "BAD";
20 22
    private static final String UNRESOLVED = "UNRESOLVED";
21 23

  
22
    /** Path to the file that contains a list of DOIs, one per line. **/
23
    private String filePath;
24
    /** Path to the dir that contains the files, each a csv with a list of DOIs, one per line. **/
25
    private String baseDir;
26
    private String fromDate;
24 27

  
25 28
    private ArrayBlockingQueue<String> queue;
26 29

  
27 30
    private CrossrefResolver crossrefResolver;
28 31

  
29 32

  
30
    public DOIResolverIterator(final String filePath, final CrossrefResolver crossrefResolver) {
31
        this.filePath = filePath;
33
    public DOIResolverIterator(final String baseDir, final CrossrefResolver crossrefResolver, final String fromDate) {
34
        this.baseDir = baseDir;
35
        this.fromDate = fromDate;
32 36
        this.queue = new ArrayBlockingQueue<>(100);
33 37
        this.crossrefResolver = crossrefResolver;
34 38
        init();
......
36 40

  
37 41
    private void init(){
38 42
        log.info("Init");
43

  
39 44
        new Thread(() -> {
40
            int count = 0;
41
            // put first item in the queue
42
            if(queue.offer(STARTER)) {
43
                // read the file, ask the resolvers, put results in a shared queue
44
                //whatever exceptions, add terminator to the queue
45
                try{
46
                    Files.lines(Paths.get(filePath)).forEach(doi -> queue.offer(resolve(doi)));
47
                } catch (IOException e) {
48
                    log.error("DOI processing aborted");
49
                    log.error(e);
50
                    queue.offer(BAD_TERMINATOR);
45
            try{
46
                final FileSystemIterator fsi = new FileSystemIterator(baseDir, "csv", fromDate);
47
                // put first item in the queue
48
                if(queue.offer(STARTER)) {
49
                    // read the file, ask the resolvers, put results in a shared queue
50
                    //whatever exceptions, add terminator to the queue
51
                    while (fsi.hasNext()) {
52
                        String filePath = fsi.next();
53
                        try (Stream<String> stream = Files.lines(Paths.get(filePath))) {
54

  
55
                            stream.forEach(doi -> queue.offer(resolve(doi)));
56

  
57
                        } catch (IOException e) {
58
                            log.error("DOI processing aborted");
59
                            log.error(e);
60
                            queue.offer(BAD_TERMINATOR);
61
                        }
62
                    }
51 63
                }
64
            } catch (Exception e) {
65
                log.error("DOI processing aborted");
66
                log.error(e);
67
                queue.offer(BAD_TERMINATOR);
52 68
            }
53 69
            queue.offer(TERMINATOR);
54 70
            log.info("Finished processing DOI list");
......
107 123
        return queue.poll();
108 124
    }
109 125

  
110
    public String getFilePath() {
111
        return filePath;
126
    public String getBaseDir() {
127
        return baseDir;
112 128
    }
113 129

  
114
    public void setFilePath(String filePath) {
115
        this.filePath = filePath;
130
    public void setBaseDir(String baseDir) {
131
        this.baseDir = baseDir;
116 132
    }
117 133

  
118 134
    public CrossrefResolver getCrossrefResolver() {
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverPlugin.java
11 11

  
12 12
    @Override
13 13
    public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
14
        //check baseurl not blank
15
        return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver);
14
        final String baseUrl = interfaceDescriptor.getBaseUrl();
15
        if ((baseUrl == null) || baseUrl.isEmpty()) {
16
            throw new CollectorServiceException("Param 'baseurl' is null or empty");
17
        }
18
        if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
19
        return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver, fromDate);
16 20
    }
17 21

  
18 22
    public CrossrefResolver getCrossrefResolver() {

Also available in: Unified diff