Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.oai;
2

    
3
import static org.junit.Assert.assertTrue;
4

    
5
import java.io.IOException;
6
import java.io.StringReader;
7
import java.util.Collection;
8
import java.util.List;
9

    
10
import org.apache.commons.io.IOUtils;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.io.SAXReader;
14
import org.junit.Before;
15
import org.junit.Test;
16

    
17
import com.google.common.collect.Lists;
18

    
19
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
20

    
21
public class RecordFieldsExtractorTest {
22

    
23
	private String record = "representativeRecord.xml";
24
	private String record2 = "dedupedRecord.xml";
25
	private String record3 = "originalRecord.xml";
26
	private RecordFieldsExtractor fieldExtractor;
27
	private List<String> enrichmentXPaths = Lists.newArrayList("//subject[./@inferred='true']", "//result/datainfo[./inferenceprovenance='dedup']",
28
			"//rel[./@inferred='true' and ./@inferenceprovenance != 'dedup']");
29

    
30
	@Before
31
	public void setUp() throws Exception {
32
		fieldExtractor = new RecordFieldsExtractor();
33
	}
34

    
35
	@Test
36
	public void testEnhanced() throws IOException, DocumentException {
37

    
38
		String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record));
39
		Document doc = new SAXReader().read(new StringReader(recordString));
40
		Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
41
		System.out.println(sets);
42
		assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched"));
43
	}
44

    
45
	@Test
46
	public void testEnhancedDeduped() throws IOException, DocumentException {
47

    
48
		String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record2));
49
		Document doc = new SAXReader().read(new StringReader(recordString));
50
		Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
51
		System.out.println(sets);
52
		assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched"));
53
	}
54

    
55
	@Test
56
	public void testNotEnhanced() throws IOException, DocumentException {
57

    
58
		String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record3));
59
		Document doc = new SAXReader().read(new StringReader(recordString));
60
		Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
61
		System.out.println(sets);
62
		assertTrue(sets.isEmpty());
63
	}
64
}
(2-2/2)