Revision 48833
Added by Alessia Bardi about 7 years ago
modules/dnet-data-services/branches/saxonHE/src/test/java/eu/dnetlib/data/collector/plugins/HttpSimpleCollectorPluginTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import eu.dnetlib.rmi.data.CollectorServiceException; |
|
5 |
import eu.dnetlib.rmi.data.InterfaceDescriptor; |
|
6 |
import org.junit.Assert; |
|
7 |
import org.junit.Before; |
|
8 |
import org.junit.Ignore; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
/** |
|
12 |
* Created by Alessia Bardi on 10/08/2017. |
|
13 |
* |
|
14 |
* @author Alessia Bardi |
|
15 |
*/ |
|
16 |
@Ignore |
|
17 |
public class HttpSimpleCollectorPluginTest { |
|
18 |
|
|
19 |
private InterfaceDescriptor apiDescriptor; |
|
20 |
private HttpSimpleCollectorPlugin plugin; |
|
21 |
|
|
22 |
@Before |
|
23 |
public void prepare(){ |
|
24 |
plugin = new HttpSimpleCollectorPlugin(); |
|
25 |
apiDescriptor = new InterfaceDescriptor(); |
|
26 |
apiDescriptor.setBaseUrl("http://data.d4science.org/em1EemhBdUZ0bjNGTWJNNjlxVDltcm9acDFmMHlBSVVHbWJQNStIS0N6Yz0"); |
|
27 |
} |
|
28 |
|
|
29 |
@Test |
|
30 |
public void test() throws CollectorServiceException { |
|
31 |
Iterable<String> res = plugin.collect(apiDescriptor, null, null); |
|
32 |
Assert.assertTrue(Lists.newArrayList(res).size() == 1); |
|
33 |
for(String r : res){ |
|
34 |
System.out.println(r); |
|
35 |
Assert.assertNotNull(r); |
|
36 |
} |
|
37 |
|
|
38 |
} |
|
39 |
|
|
40 |
} |
modules/dnet-data-services/branches/saxonHE/src/main/java/eu/dnetlib/data/collector/plugins/HttpSimpleCollectorPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.net.URL; |
|
5 |
import java.nio.charset.Charset; |
|
6 |
|
|
7 |
import com.google.common.collect.Lists; |
|
8 |
import eu.dnetlib.rmi.data.CollectorServiceException; |
|
9 |
import eu.dnetlib.rmi.data.InterfaceDescriptor; |
|
10 |
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin; |
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
|
|
13 |
/** |
|
14 |
* Collects whatever is in the given baseUrl. If you have multiple records to be splitted, then use eu.dnetlib.data.collector.plugins.HttpCollectorPlugin. |
|
15 |
* If you have a CSV file, then prefer eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin . |
|
16 |
*/ |
|
17 |
public class HttpSimpleCollectorPlugin extends AbstractCollectorPlugin { |
|
18 |
|
|
19 |
@Override |
|
20 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
21 |
throws CollectorServiceException { |
|
22 |
|
|
23 |
URL url = null; |
|
24 |
try { |
|
25 |
url = new URL(interfaceDescriptor.getBaseUrl()); |
|
26 |
return Lists.newArrayList(IOUtils.toString(url, Charset.forName("UTF-8"))); |
|
27 |
|
|
28 |
} catch (IOException e) { |
|
29 |
throw new CollectorServiceException("Cannot download from URL: " + url, e); |
|
30 |
} |
|
31 |
} |
|
32 |
} |
modules/dnet-data-services/branches/saxonHE/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml | ||
---|---|---|
44 | 44 |
</property> |
45 | 45 |
</bean> |
46 | 46 |
|
47 |
<bean id="httpSimpleCollectorPlugin" class="eu.dnetlib.data.collector.plugins.HttpSimpleCollectorPlugin"> |
|
48 |
<property name="protocolDescriptor"> |
|
49 |
<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="httpSimple"/> |
|
50 |
</property> |
|
51 |
</bean> |
|
52 |
|
|
47 | 53 |
<bean id="fileCollectorPlugin" class="eu.dnetlib.data.collector.plugins.FileCollectorPlugin"> |
48 | 54 |
<property name="protocolDescriptor"> |
49 | 55 |
<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="file"> |
modules/dnet-parthenos/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/VocabularyDSResourceType/api_protocols.xml | ||
---|---|---|
41 | 41 |
<TERM code="httpCSV" encoding="DNET" english_name="httpCSV" native_name="httpCSV"> |
42 | 42 |
<SYNONYMS/> |
43 | 43 |
</TERM> |
44 |
<TERM code="httpSimple" encoding="DNET" english_name="httpSimple" native_name="httpSimple"> |
|
45 |
<SYNONYMS/> |
|
46 |
</TERM> |
|
44 | 47 |
<TERM code="files_from_mdstore" encoding="DNET" english_name="files_from_mdstore" native_name="files_from_mdstore"> |
45 | 48 |
<SYNONYMS/> |
46 | 49 |
</TERM> |
Also available in: Unified diff
New simple http collector plugin that doesn't care about splitting the file it downloads.