Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.schemaorg;
2

    
3
import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6
import org.junit.Assert;
7
import org.junit.Before;
8
import org.junit.Ignore;
9
import org.junit.Test;
10

    
11
import java.net.URL;
12
import java.nio.charset.StandardCharsets;
13
import java.util.HashMap;
14
import java.util.concurrent.TimeUnit;
15

    
16
@Ignore
17
public class SchemaOrgSitemapIteratorTest {
18
	@Before
19
	public void setUp() throws Exception {
20
	}
21

    
22
	@Test
23
	public void test() throws CollectorServiceException {
24
		URL resource = SchemaOrgSitemapIteratorTest.class.getResource("sitemap.xml");
25

    
26
		HashMap<String,String> params = new HashMap<>();
27
		params.put("repositoryAccessType", "sitemapindex");
28
		params.put("consumerBlockPolling", Boolean.toString(true));
29
		params.put("consumerBlockPollingTimeout", "2");
30
		params.put("consumerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
31
		params.put("endpointCharset", StandardCharsets.UTF_8.name());
32
		params.put("updatedDateFormat", "YYYY-MM-DD");
33
		params.put("createdDateFormat", "YYYY-MM-DD");
34
		params.put("publicationDateFormat", "YYYY-MM-DD");
35
		params.put("contributorFallbackType", DatasetDocument.Contributor.ContributorType.Other.toString());
36
		params.put("identifierFallbackType", null);
37
		params.put("identifierFallbackURL", Boolean.toString(true));
38
		params.put("identifierMappingARK", "ark, ARK");
39
		params.put("identifierMappingDOI", "doi, DOI");
40
		params.put("identifierMappingHandle", "Handle, HANDLE");
41
		params.put("identifierMappingPURL", "purl, PURL");
42
		params.put("identifierMappingURN", "urn, URN");
43
		params.put("identifierMappingURL", "url, URL");
44

    
45
		params.put("repositoryAccessType", "sitemapindex");
46
		params.put("sitemap_queueSize", "100");
47
		params.put("sitemap_IndexCharset", StandardCharsets.UTF_8.name());
48
		params.put("sitemap_FileCharset", StandardCharsets.UTF_8.name());
49
		params.put("sitemap_FileSchema", SitemapFileIterator.Options.SitemapSchemaType.Text.toString());
50
		params.put("sitemap_FileType", SitemapFileIterator.Options.SitemapFileType.Text.toString());
51

    
52
		InterfaceDescriptor descriptor = new InterfaceDescriptor();
53
		descriptor.setId("schema.org - reactome");
54
		descriptor.setBaseUrl(resource.toString());
55
		descriptor.setParams(params);
56

    
57
		SchemaOrgPlugin schemaOrgPlugin = new SchemaOrgPlugin();
58

    
59
		Iterable<String> iterable = schemaOrgPlugin.collect(descriptor, null, null);
60

    
61
		int length =0;
62
		int count =0;
63
		int nullcount =0;
64
		for(String item : iterable) {
65
			count += 1;
66
			if(item == null) {
67
				nullcount+=1;
68
				continue;
69
			}
70
			length = item.length();
71
		}
72
		Assert.assertEquals(1, nullcount);
73
		Assert.assertEquals(2, count);
74
		Assert.assertEquals(1626, length);
75

    
76
	}
77
}
    (1-1/1)