Revision 59095
Added by Alessia Bardi almost 4 years ago
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIteratorTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
2 | 2 |
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
3 | 5 |
import org.junit.Assert; |
4 | 6 |
import org.junit.Before; |
5 | 7 |
import org.junit.Test; |
... | ... | |
12 | 14 |
@RunWith(MockitoJUnitRunner.class) |
13 | 15 |
public class DOIResolverIteratorTest { |
14 | 16 |
|
17 |
private static final Log log = LogFactory.getLog(DOIResolverIteratorTest.class); |
|
15 | 18 |
@Mock |
16 | 19 |
CrossrefResolver resolver; |
17 | 20 |
DOIResolverIterator it; |
18 | 21 |
|
22 |
String dirpath; |
|
23 |
|
|
19 | 24 |
@Before |
20 | 25 |
public void setup(){ |
21 | 26 |
when(resolver.resolve("1")).thenReturn("RECORD1"); |
22 | 27 |
when(resolver.resolve("2")).thenReturn(null); |
23 | 28 |
when(resolver.resolve("3")).thenReturn("RECORD3"); |
24 |
String file = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver/doi_list.csv").getFile();
|
|
25 |
it = new DOIResolverIterator(file, resolver); |
|
29 |
dirpath = getClass().getResource("/eu/dnetlib/data/collector/plugins/doiresolver").getPath();
|
|
30 |
|
|
26 | 31 |
} |
27 | 32 |
|
28 | 33 |
@Test |
29 | 34 |
public void test(){ |
35 |
it = new DOIResolverIterator(dirpath, resolver, null); |
|
36 |
int count = 0; |
|
30 | 37 |
while(it.hasNext()){ |
31 |
System.out.println(it.next()); |
|
38 |
String res = it.next(); |
|
39 |
log.info(res); |
|
40 |
if(count == 0) Assert.assertEquals("RECORD1", res); |
|
41 |
if(count == 1) Assert.assertEquals("RECORD3", res); |
|
42 |
count++; |
|
32 | 43 |
} |
44 |
Assert.assertEquals(2, count); |
|
33 | 45 |
} |
34 | 46 |
|
35 | 47 |
@Test |
48 |
public void testIncremental(){ |
|
49 |
it = new DOIResolverIterator(dirpath, resolver, "2020-07-13"); |
|
50 |
int count = 0; |
|
51 |
while(it.hasNext()){ |
|
52 |
String res = it.next(); |
|
53 |
count++; |
|
54 |
} |
|
55 |
Assert.assertEquals(0, count); |
|
56 |
} |
|
57 |
|
|
58 |
@Test |
|
59 |
public void testIncremental2(){ |
|
60 |
it = new DOIResolverIterator(dirpath, resolver, "2020-01-13"); |
|
61 |
int count = 0; |
|
62 |
while(it.hasNext()){ |
|
63 |
String res = it.next(); |
|
64 |
count++; |
|
65 |
} |
|
66 |
Assert.assertEquals(2, count); |
|
67 |
} |
|
68 |
|
|
69 |
@Test |
|
36 | 70 |
public void testCleanOk(){ |
71 |
it = new DOIResolverIterator(dirpath, resolver, null); |
|
37 | 72 |
String doi = "10.1234/1234"; |
38 | 73 |
Assert.assertEquals(doi, it.cleanDOI(doi)); |
39 | 74 |
} |
40 | 75 |
|
41 | 76 |
@Test |
42 | 77 |
public void testCleanHttp(){ |
78 |
it = new DOIResolverIterator(dirpath, resolver, null); |
|
43 | 79 |
String doi = "10.1234/1234"; |
44 | 80 |
String doiURL = "http://dx.doi.org/"+doi; |
45 | 81 |
Assert.assertEquals(doi, it.cleanDOI(doiURL)); |
... | ... | |
47 | 83 |
|
48 | 84 |
@Test |
49 | 85 |
public void testCleanHttps(){ |
86 |
it = new DOIResolverIterator(dirpath, resolver, null); |
|
50 | 87 |
String doi = "10.1234/1234"; |
51 | 88 |
String doiURL = "https://dx.doi.org/"+doi; |
52 | 89 |
Assert.assertEquals(doi, it.cleanDOI(doiURL)); |
modules/dnet-collector-plugins/trunk/src/test/resources/log4j.properties | ||
---|---|---|
11 | 11 |
log4j.logger.eu.dnetlib.data.collector.plugins.projects.grist=DEBUG |
12 | 12 |
log4j.logger.eu.dnetlib.data.collector.plugins.projects.gtr2=DEBUG |
13 | 13 |
log4j.logger.eu.dnetlib.data.collector.plugins.doiresolver=DEBUG |
14 |
log4j.logger.eu.dnetlib.data.collector.plugins.filesystem=DEBUG |
|
14 | 15 |
|
15 | 16 |
|
16 | 17 |
|
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverIterator.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.collector.plugins.doiresolver; |
2 | 2 |
|
3 |
import eu.dnetlib.data.collector.plugins.filesystem.FileSystemIterator; |
|
3 | 4 |
import org.apache.commons.lang.StringUtils; |
4 | 5 |
import org.apache.commons.logging.Log; |
5 | 6 |
import org.apache.commons.logging.LogFactory; |
... | ... | |
9 | 10 |
import java.nio.file.Paths; |
10 | 11 |
import java.util.Iterator; |
11 | 12 |
import java.util.concurrent.ArrayBlockingQueue; |
13 |
import java.util.stream.Stream; |
|
12 | 14 |
|
13 | 15 |
public class DOIResolverIterator implements Iterator<String> { |
14 | 16 |
|
... | ... | |
19 | 21 |
private static final String BAD_TERMINATOR = "BAD"; |
20 | 22 |
private static final String UNRESOLVED = "UNRESOLVED"; |
21 | 23 |
|
22 |
/** Path to the file that contains a list of DOIs, one per line. **/ |
|
23 |
private String filePath; |
|
24 |
/** Path to the dir that contains the files, each a csv with a list of DOIs, one per line. **/ |
|
25 |
private String baseDir; |
|
26 |
private String fromDate; |
|
24 | 27 |
|
25 | 28 |
private ArrayBlockingQueue<String> queue; |
26 | 29 |
|
27 | 30 |
private CrossrefResolver crossrefResolver; |
28 | 31 |
|
29 | 32 |
|
30 |
public DOIResolverIterator(final String filePath, final CrossrefResolver crossrefResolver) { |
|
31 |
this.filePath = filePath; |
|
33 |
public DOIResolverIterator(final String baseDir, final CrossrefResolver crossrefResolver, final String fromDate) { |
|
34 |
this.baseDir = baseDir; |
|
35 |
this.fromDate = fromDate; |
|
32 | 36 |
this.queue = new ArrayBlockingQueue<>(100); |
33 | 37 |
this.crossrefResolver = crossrefResolver; |
34 | 38 |
init(); |
... | ... | |
36 | 40 |
|
37 | 41 |
private void init(){ |
38 | 42 |
log.info("Init"); |
43 |
|
|
39 | 44 |
new Thread(() -> { |
40 |
int count = 0; |
|
41 |
// put first item in the queue |
|
42 |
if(queue.offer(STARTER)) { |
|
43 |
// read the file, ask the resolvers, put results in a shared queue |
|
44 |
//whatever exceptions, add terminator to the queue |
|
45 |
try{ |
|
46 |
Files.lines(Paths.get(filePath)).forEach(doi -> queue.offer(resolve(doi))); |
|
47 |
} catch (IOException e) { |
|
48 |
log.error("DOI processing aborted"); |
|
49 |
log.error(e); |
|
50 |
queue.offer(BAD_TERMINATOR); |
|
45 |
try{ |
|
46 |
final FileSystemIterator fsi = new FileSystemIterator(baseDir, "csv", fromDate); |
|
47 |
// put first item in the queue |
|
48 |
if(queue.offer(STARTER)) { |
|
49 |
// read the file, ask the resolvers, put results in a shared queue |
|
50 |
//whatever exceptions, add terminator to the queue |
|
51 |
while (fsi.hasNext()) { |
|
52 |
String filePath = fsi.next(); |
|
53 |
try (Stream<String> stream = Files.lines(Paths.get(filePath))) { |
|
54 |
|
|
55 |
stream.forEach(doi -> queue.offer(resolve(doi))); |
|
56 |
|
|
57 |
} catch (IOException e) { |
|
58 |
log.error("DOI processing aborted"); |
|
59 |
log.error(e); |
|
60 |
queue.offer(BAD_TERMINATOR); |
|
61 |
} |
|
62 |
} |
|
51 | 63 |
} |
64 |
} catch (Exception e) { |
|
65 |
log.error("DOI processing aborted"); |
|
66 |
log.error(e); |
|
67 |
queue.offer(BAD_TERMINATOR); |
|
52 | 68 |
} |
53 | 69 |
queue.offer(TERMINATOR); |
54 | 70 |
log.info("Finished processing DOI list"); |
... | ... | |
107 | 123 |
return queue.poll(); |
108 | 124 |
} |
109 | 125 |
|
110 |
public String getFilePath() {
|
|
111 |
return filePath;
|
|
126 |
public String getBaseDir() {
|
|
127 |
return baseDir;
|
|
112 | 128 |
} |
113 | 129 |
|
114 |
public void setFilePath(String filePath) {
|
|
115 |
this.filePath = filePath;
|
|
130 |
public void setBaseDir(String baseDir) {
|
|
131 |
this.baseDir = baseDir;
|
|
116 | 132 |
} |
117 | 133 |
|
118 | 134 |
public CrossrefResolver getCrossrefResolver() { |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/doiresolver/DOIResolverPlugin.java | ||
---|---|---|
11 | 11 |
|
12 | 12 |
@Override |
13 | 13 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) throws CollectorServiceException { |
14 |
//check baseurl not blank |
|
15 |
return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver); |
|
14 |
final String baseUrl = interfaceDescriptor.getBaseUrl(); |
|
15 |
if ((baseUrl == null) || baseUrl.isEmpty()) { |
|
16 |
throw new CollectorServiceException("Param 'baseurl' is null or empty"); |
|
17 |
} |
|
18 |
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); } |
|
19 |
return () -> new DOIResolverIterator(interfaceDescriptor.getBaseUrl(), crossrefResolver, fromDate); |
|
16 | 20 |
} |
17 | 21 |
|
18 | 22 |
public CrossrefResolver getCrossrefResolver() { |
Also available in: Unified diff
DOIResolver plugin now supports multiple csv files in the input folder (baseURL) and incremental