Project

General

Profile

« Previous | Next » 

Revision 48833

New simple http collector plugin that doesn't care about splitting the file it downloads.

View differences:

modules/dnet-data-services/branches/saxonHE/src/test/java/eu/dnetlib/data/collector/plugins/HttpSimpleCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins;
2

  
3
import com.google.common.collect.Lists;
4
import eu.dnetlib.rmi.data.CollectorServiceException;
5
import eu.dnetlib.rmi.data.InterfaceDescriptor;
6
import org.junit.Assert;
7
import org.junit.Before;
8
import org.junit.Ignore;
9
import org.junit.Test;
10

  
11
/**
12
 * Created by Alessia Bardi on 10/08/2017.
13
 *
14
 * @author Alessia Bardi
15
 */
16
@Ignore
17
public class HttpSimpleCollectorPluginTest {
18

  
19
	private InterfaceDescriptor apiDescriptor;
20
	private HttpSimpleCollectorPlugin plugin;
21

  
22
	@Before
23
	public void prepare(){
24
		plugin = new HttpSimpleCollectorPlugin();
25
		apiDescriptor = new InterfaceDescriptor();
26
		apiDescriptor.setBaseUrl("http://data.d4science.org/em1EemhBdUZ0bjNGTWJNNjlxVDltcm9acDFmMHlBSVVHbWJQNStIS0N6Yz0");
27
	}
28

  
29
	@Test
30
	public void test() throws CollectorServiceException {
31
	Iterable<String> res = plugin.collect(apiDescriptor, null, null);
32
	Assert.assertTrue(Lists.newArrayList(res).size() == 1);
33
	for(String r : res){
34
		System.out.println(r);
35
		Assert.assertNotNull(r);
36
	}
37

  
38
	}
39

  
40
}
modules/dnet-data-services/branches/saxonHE/src/main/java/eu/dnetlib/data/collector/plugins/HttpSimpleCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins;
2

  
3
import java.io.IOException;
4
import java.net.URL;
5
import java.nio.charset.Charset;
6

  
7
import com.google.common.collect.Lists;
8
import eu.dnetlib.rmi.data.CollectorServiceException;
9
import eu.dnetlib.rmi.data.InterfaceDescriptor;
10
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
11
import org.apache.commons.io.IOUtils;
12

  
13
/**
14
 * Collects whatever is in the given baseUrl. If you have multiple records to be splitted, then use eu.dnetlib.data.collector.plugins.HttpCollectorPlugin.
15
 * If you have a CSV file, then prefer eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin .
16
 */
17
public class HttpSimpleCollectorPlugin extends AbstractCollectorPlugin {
18

  
19
	@Override
20
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
21
			throws CollectorServiceException {
22

  
23
		URL url = null;
24
		try {
25
			url = new URL(interfaceDescriptor.getBaseUrl());
26
			return Lists.newArrayList(IOUtils.toString(url, Charset.forName("UTF-8")));
27

  
28
		} catch (IOException e) {
29
			throw new CollectorServiceException("Cannot download from URL: " + url, e);
30
		}
31
	}
32
}
modules/dnet-data-services/branches/saxonHE/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
44 44
		</property>
45 45
	</bean>
46 46

  
47
	<bean id="httpSimpleCollectorPlugin" class="eu.dnetlib.data.collector.plugins.HttpSimpleCollectorPlugin">
48
		<property name="protocolDescriptor">
49
			<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="httpSimple"/>
50
		</property>
51
	</bean>
52

  
47 53
	<bean id="fileCollectorPlugin" class="eu.dnetlib.data.collector.plugins.FileCollectorPlugin">
48 54
		<property name="protocolDescriptor">
49 55
			<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="file">
modules/dnet-parthenos/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/VocabularyDSResourceType/api_protocols.xml
41 41
                <TERM code="httpCSV" encoding="DNET" english_name="httpCSV" native_name="httpCSV">
42 42
                    <SYNONYMS/>
43 43
                </TERM>
44
                <TERM code="httpSimple" encoding="DNET" english_name="httpSimple" native_name="httpSimple">
45
                    <SYNONYMS/>
46
                </TERM>
44 47
                <TERM code="files_from_mdstore" encoding="DNET" english_name="files_from_mdstore" native_name="files_from_mdstore">
45 48
                    <SYNONYMS/>
46 49
                </TERM>

Also available in: Unified diff