Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.schemaorg;
2

    
3
import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6
import org.junit.Assert;
7
import org.junit.Before;
8
import org.junit.Ignore;
9
import org.junit.Test;
10

    
11
import java.net.URL;
12
import java.nio.charset.StandardCharsets;
13
import java.util.ArrayList;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.concurrent.TimeUnit;
17

    
18
@Ignore
19
public class SchemaOrgSitemapIteratorTest {
20
	@Before
21
	public void setUp() throws Exception {
22
	}
23

    
24
	@Test
25
	public void test() throws CollectorServiceException {
26
		URL resource = SchemaOrgSitemapIteratorTest.class.getResource("sitemap.xml");
27

    
28
		HashMap<String,String> params = new HashMap<>();
29
		params.put("repositoryAccessType", "sitemapindex");
30
		params.put("consumerBlockPolling", Boolean.toString(true));
31
		params.put("consumerBlockPollingTimeout", "2");
32
		params.put("consumerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
33
		params.put("endpointCharset", StandardCharsets.UTF_8.name());
34
		params.put("updatedDateFormat", "YYYY-MM-DD");
35
		params.put("createdDateFormat", "YYYY-MM-DD");
36
		params.put("publicationDateFormat", "YYYY-MM-DD");
37
		params.put("contributorFallbackType", DatasetDocument.Contributor.ContributorType.Other.toString());
38
		params.put("identifierFallbackType", null);
39
		params.put("identifierFallbackURL", Boolean.toString(true));
40
		params.put("identifierMappingARK", "ark, ARK");
41
		params.put("identifierMappingDOI", "doi, DOI");
42
		params.put("identifierMappingHandle", "Handle, HANDLE");
43
		params.put("identifierMappingPURL", "purl, PURL");
44
		params.put("identifierMappingURN", "urn, URN");
45
		params.put("identifierMappingURL", "url, URL");
46

    
47
		params.put("repositoryAccessType", "sitemapindex");
48
		params.put("sitemap_queueSize", "100");
49
		params.put("sitemap_IndexCharset", StandardCharsets.UTF_8.name());
50
		params.put("sitemap_FileCharset", StandardCharsets.UTF_8.name());
51
		params.put("sitemap_FileSchema", SitemapFileIterator.Options.SitemapSchemaType.Text.toString());
52
		params.put("sitemap_FileType", SitemapFileIterator.Options.SitemapFileType.Text.toString());
53

    
54
		InterfaceDescriptor descriptor = new InterfaceDescriptor();
55
		descriptor.setId("schema.org - reactome");
56
		descriptor.setBaseUrl(resource.toString());
57
		descriptor.setParams(params);
58

    
59
		SchemaOrgPlugin schemaOrgPlugin = new SchemaOrgPlugin();
60

    
61
		Iterable<String> iterable = schemaOrgPlugin.collect(descriptor, null, null);
62

    
63
		List<Integer> lengths =new ArrayList<>();
64
		int count =0;
65
		for(String item : iterable) {
66
			count += 1;
67
			lengths.add(item.length());
68
		}
69
		Assert.assertEquals(2, count);
70
		Assert.assertEquals(1626, (int)lengths.get(0));
71
		Assert.assertEquals(48, (int)lengths.get(1));
72

    
73
	}
74
}
    (1-1/1)