Project

General

Profile

« Previous | Next » 

Revision 27189

multiple sets management

View differences:

modules/dnet-modular-collector-service/trunk/src/test/java/eu/dnetlib/data/collector/plugins/oai/OaiIteratorTest.java
1 1
package eu.dnetlib.data.collector.plugins.oai;
2 2

  
3
import org.junit.Before;
4
import org.junit.Ignore;
3 5
import org.junit.Test;
4 6

  
5 7
public class OaiIteratorTest {
6

  
7
	// TODO implement a local test (not depending on external services)
8
	
9
	private static final String BASE_URL = "http://oai.d.efg.research-infrastructures.eu/oai.do";
10
	private static final String FORMAT = "oai_dc";
11
	private static final String SET = "d937bab1-d44c-44aa-bf7d-df5312a3b623";
12
	
13
	private OaiIterator oai;
14
	
15
	@Before
16
	public void setUp() {
17
		oai = new OaiIterator(BASE_URL, FORMAT, SET);
18
	}
19
	
8 20
	@Test
21
	@Ignore
9 22
	public void test() {
10

  
11
		// final Iterator<String> iter = new OaiIterator("http://zenodo.org/oai2d", "oai_dc", null);
12
		//
13
		// int count = 0;
14
		// System.out.println("START: " + count);
15
		// while (iter.hasNext()) {
16
		// iter.next();
17
		// count++;
18
		// if ((count % 100) == 0) {
19
		// System.out.println(" - " + count);
20
		// }
21
		// }
22
		// System.out.println("TOTAL: " + count);
23
		int count = 0;
24
		while (oai.hasNext()) {
25
			oai.next();
26
			count++;
27
		}
28
		System.out.println("TOTAL: " + count);
23 29
	}
24 30
}
modules/dnet-modular-collector-service/trunk/src/test/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPluginRealTest.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import java.util.HashMap;
4

  
5
import org.junit.Before;
6
import org.junit.Ignore;
7
import org.junit.Test;
8

  
9
import com.google.common.collect.Iterables;
10

  
11
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
12

  
13
public class OaiCollectorPluginRealTest {
14
	
15
	private OaiCollectorPlugin oai;
16

  
17
	private static final String BASE_URL = "http://oai.d.efg.research-infrastructures.eu/oai.do";
18
	private static final String FORMAT = "oai_dc";
19
	private static final String SETS = "d937bab1-d44c-44aa-bf7d-df5312a3b623, e5b14959-1e87-4c07-9f85-942c9cdd9136, 13302eb6-764a-4ed2-8d08-2a1c9526f442, 31701e97-096f-4266-81b5-30b9bc3a06b0";
20

  
21
	@Before
22
	public void setUp() {
23
		oai = new OaiCollectorPlugin();
24
		oai.setOaiIteratorFactory(new OaiIteratorFactory());
25
	}
26

  
27
	@Test
28
	@Ignore
29
	public void testCollect() throws Exception {
30
		final InterfaceDescriptor iface = new InterfaceDescriptor();
31
		iface.setId("123");
32
		iface.setProtocol("OAI");
33
		iface.setBaseUrl(BASE_URL);
34
		iface.setParams(new HashMap<String, String>());
35
		iface.getParams().put("format", FORMAT);
36
		iface.getParams().put("set", SETS);
37
		
38
		int count = 0;
39
		for(String s : oai.collect(iface)) {
40
			count++;
41
		}
42
		System.out.println("TOTAL: " + count);
43
	}
44

  
45
}
modules/dnet-modular-collector-service/trunk/src/test/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5
import static org.mockito.Mockito.verify;
6
import static org.mockito.Mockito.when;
7

  
8
import java.util.HashMap;
9
import java.util.Iterator;
10
import java.util.List;
11

  
12
import org.junit.Before;
13
import org.junit.Test;
14
import org.junit.runner.RunWith;
15
import org.mockito.Mock;
16
import org.mockito.internal.verification.Times;
17
import org.mockito.runners.MockitoJUnit44Runner;
18

  
19
import com.google.common.base.Joiner;
20
import com.google.common.collect.Lists;
21

  
22
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
23

  
24
@RunWith(MockitoJUnit44Runner.class)
25
public class OaiCollectorPluginTest {
26
	
27
	private OaiCollectorPlugin oai;
28
	
29
	@Mock
30
	private OaiIteratorFactory oaiIteratorFactory;
31
	
32
	private List<String> elements = Lists.newArrayList("0", "1", "2", "3", "4", "5", "6", "7", "8" , "9", "10", "11", "12");
33
	
34
	private Iterator<String> oaiIterator1 = elements.subList(0, 3).iterator();
35
	private Iterator<String> oaiIterator2 = elements.subList(3, 7).iterator();
36
	private Iterator<String> oaiIterator3 = elements.subList(7, elements.size()).iterator();
37

  
38
	private static final String BASE_URL = "http://oai.test.it/oai";
39
	private static final String FORMAT = "oai_dc";
40
	private static final String PROTOCOL = "OAI";
41
	private static final String SET_1 = "set01";
42
	private static final String SET_2 = "set02";
43
	private static final String SET_3 = "set03";
44
	
45
	@Before
46
	public void setUp() {
47
		oai = new OaiCollectorPlugin();
48
		oai.setOaiIteratorFactory(oaiIteratorFactory);
49
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_1)).thenReturn(oaiIterator1);
50
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_2)).thenReturn(oaiIterator2);
51
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_3)).thenReturn(oaiIterator3);
52
	}
53
	
54
	public void test() {
55
		oai = new OaiCollectorPlugin();
56
	}
57
	
58
	@Test
59
	public void testGetProtocol() {
60
		assertEquals(PROTOCOL, oai.getProtocol());
61
	}
62
	
63
	@Test
64
	public void testCollect() throws Exception {
65
		final InterfaceDescriptor iface = new InterfaceDescriptor();
66
		iface.setId("123");
67
		iface.setProtocol(PROTOCOL);
68
		iface.setBaseUrl(BASE_URL);
69
		iface.setParams(new HashMap<String, String>());
70
		iface.getParams().put("format", FORMAT);
71
		iface.getParams().put("set", Joiner.on(", ").join(SET_1, SET_2, SET_3));
72
		
73
		final Iterable<String> records = oai.collect(iface);
74
		
75
		assertNotNull(records);
76
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_1);
77
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_2);
78
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_3);
79
		
80
		int count = 0; 
81
		for (String s : records) {
82
			System.out.println("RECORD: " + s);
83
			assertEquals("" + count, s);
84
			count++;
85
		}
86
		assertEquals(elements.size(), count);
87
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_1);
88
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_2);
89
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_3);
90
	}
91
}
modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java
24 24
	private SAXReader reader = new SAXReader();
25 25
	private HttpClient client = new HttpClient();
26 26
	private String baseUrl;
27
	private String set;
28
	private String mdFormat;
27 29
	private String token;
30
	private boolean started;
28 31

  
29 32
	public OaiIterator(final String baseUrl, final String mdFormat, final String set) {
30 33
		this.baseUrl = baseUrl;
31
		if ((set != null) && !set.isEmpty()) {
32
			token = firstPage(mdFormat, set);
33
		} else {
34
			token = firstPage(mdFormat, null);
34
		this.mdFormat = mdFormat;
35
		this.set = set;
36
		this.started = false;
37
	}
38
	
39
	private void verifyStarted() {
40
		if (!this.started) {
41
			this.token = firstPage();
42
			this.started = true;
35 43
		}
36 44
	}
37 45

  
38 46
	@Override
39 47
	public boolean hasNext() {
40 48
		synchronized (queue) {
49
			verifyStarted();
41 50
			return !queue.isEmpty();
42 51
		}
43 52
	}
......
45 54
	@Override
46 55
	public String next() {
47 56
		synchronized (queue) {
57
			verifyStarted();
48 58
			final String res = queue.poll();
49 59
			while (queue.isEmpty() && (token != null) && !token.isEmpty()) {
50 60
				token = otherPages(token);
......
56 66
	@Override
57 67
	public void remove() {}
58 68

  
59
	private String firstPage(final String mdFormat, final String set) {
69
	private String firstPage() {
70
		
60 71
		String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + mdFormat;
61 72
		if ((set != null) && !set.isEmpty()) {
62 73
			url += "&set=" + set;
63 74
		}
75
		
76
		log.info("Downloading first page using url: " + url);
77

  
64 78
		return downloadPage(url);
65 79
	}
66 80

  
......
69 83
	}
70 84

  
71 85
	private String downloadPage(final String url) {
86
		
87
		System.out.println("URL: " + url);
72 88
		try {
73 89
			log.info("HTTP GET: " + url);
74 90
			final HttpMethod method = new GetMethod(url);
......
80 96
			for (Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
81 97
				queue.add(((Node) o).asXML());
82 98
			}
83

  
99
			
100
			System.out.println("Done");
84 101
			return doc.valueOf("//*[local-name()='resumptionToken']");
85 102
		} catch (Exception e) {
86 103
			throw new RuntimeException("Error obtaining records from: " + url, e);
modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIteratorFactory.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import java.util.Iterator;
4

  
5
public class OaiIteratorFactory {
6
	public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set) {
7
		return new OaiIterator(baseUrl, mdFormat, set);
8
	}
9
}
modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPlugin.java
2 2

  
3 3
import java.util.Iterator;
4 4

  
5
import org.springframework.beans.factory.annotation.Required;
6

  
7
import com.google.common.base.Function;
8
import com.google.common.base.Splitter;
9
import com.google.common.collect.Iterables;
10
import com.google.common.collect.Iterators;
11

  
5 12
import eu.dnetlib.data.collector.plugin.CollectorPlugin;
6 13
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7 14
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
......
10 17

  
11 18
	private static final String FORMAT_PARAM = "format";
12 19
	private static final String OAI_SET_PARAM = "set";
20
	
21
	private OaiIteratorFactory oaiIteratorFactory;
13 22

  
14 23
	@Override
15 24
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
16 25
		final String baseUrl = interfaceDescriptor.getBaseUrl();
17 26
		final String mdFormat = interfaceDescriptor.getParams().get(FORMAT_PARAM);
18
		final String set = interfaceDescriptor.getParams().get(OAI_SET_PARAM);
27
		final String setParam = interfaceDescriptor.getParams().get(OAI_SET_PARAM);
19 28

  
29
		final Iterable<String> sets = Splitter.on(",").omitEmptyStrings().trimResults().split(setParam);
30
		
20 31
		if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
21 32

  
22 33
		if ((mdFormat == null) || mdFormat.isEmpty()) { throw new CollectorServiceException("Param 'mdFormat' is null or empty"); }
23 34

  
24 35
		return new Iterable<String>() {
25

  
36
			@SuppressWarnings("unchecked")
26 37
			@Override
27 38
			public Iterator<String> iterator() {
28
				return new OaiIterator(baseUrl, mdFormat, set);
39
				final Iterable<Iterator<String>> iter = Iterables.transform(sets, new Function<String, Iterator<String>>() {
40
					@Override
41
					public Iterator<String> apply(String set) {
42
						return oaiIteratorFactory.newIterator(baseUrl, mdFormat, set);
43
					}
44
				});
45
				return Iterators.concat(Iterables.toArray(iter, Iterator.class));
29 46
			}
30 47
		};
31 48
	}
......
35 52
		return "OAI";
36 53
	}
37 54

  
55
	public OaiIteratorFactory getOaiIteratorFactory() {
56
		return oaiIteratorFactory;
57
	}
58

  
59
	@Required
60
	public void setOaiIteratorFactory(OaiIteratorFactory oaiIteratorFactory) {
61
		this.oaiIteratorFactory = oaiIteratorFactory;
62
	}
63

  
38 64
}
modules/dnet-modular-collector-service/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
13 13
                            http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-2.0.xsd
14 14
                            http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd">
15 15

  
16
	<bean id="oaiCollectorPlugin" class="eu.dnetlib.data.collector.plugins.oai.OaiCollectorPlugin">
17
		<property name="oaiIteratorFactory">
18
			<bean class="eu.dnetlib.data.collector.plugins.oai.OaiIteratorFactory" />
19
		</property>
20
	</bean>
21

  
16 22
	<bean id="httpCollectorPlugin" class="eu.dnetlib.data.collector.plugins.HttpCollectorPlugin" />
17 23
	<bean id="fileCollectorPlugin" class="eu.dnetlib.data.collector.plugins.FileCollectorPlugin" />
18 24
	<bean id="classpathCollectorPlugin" class="eu.dnetlib.data.collector.plugins.ClasspathCollectorPlugin" />
19
	<bean id="oaiCollectorPlugin" class="eu.dnetlib.data.collector.plugins.oai.OaiCollectorPlugin" />
20 25
	<bean id="csvFileCollectorPlugin" class="eu.dnetlib.data.collector.plugins.FileCSVCollectorPlugin" />
21 26
	<bean id="ftpCollectorPlugin" class="eu.dnetlib.data.collector.plugins.ftp.FtpCollectorPlugin" />
22 27
	<bean id="filesystemCollectorPlugin" class="eu.dnetlib.data.collector.plugins.filesystem.FilesystemCollectorPlugin" />
modules/dnet-modular-collector-service/trunk/pom.xml
65 65
		<!-- <artifactId>je</artifactId> -->
66 66
		<!-- <version>5.0.73</version> -->
67 67
		<!-- </dependency> -->
68
		<dependency>
69
			<groupId>org.mockito</groupId>
70
			<artifactId>mockito-core</artifactId>
71
			<version>1.6</version>
72
			<scope>test</scope>
73
		</dependency>
68 74

  
69

  
70 75
		<dependency>
71 76
			<groupId>commons-httpclient</groupId>
72 77
			<artifactId>commons-httpclient</artifactId>

Also available in: Unified diff