1
|
package eu.dnetlib.data.mapreduce.hbase.oai;
|
2
|
|
3
|
import static org.junit.Assert.assertTrue;
|
4
|
|
5
|
import java.io.IOException;
|
6
|
import java.io.StringReader;
|
7
|
import java.util.Collection;
|
8
|
import java.util.List;
|
9
|
|
10
|
import org.apache.commons.io.IOUtils;
|
11
|
import org.dom4j.Document;
|
12
|
import org.dom4j.DocumentException;
|
13
|
import org.dom4j.io.SAXReader;
|
14
|
import org.junit.Before;
|
15
|
import org.junit.Test;
|
16
|
|
17
|
import com.google.common.collect.Lists;
|
18
|
|
19
|
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
|
20
|
|
21
|
public class RecordFieldsExtractorTest {
|
22
|
|
23
|
private String record = "representativeRecord.xml";
|
24
|
private String record2 = "dedupedRecord.xml";
|
25
|
private String record3 = "originalRecord.xml";
|
26
|
private RecordFieldsExtractor fieldExtractor;
|
27
|
private List<String> enrichmentXPaths = Lists.newArrayList("//subject[./@inferred='true']", "//result/datainfo[./inferenceprovenance='dedup']",
|
28
|
"//rel[./@inferred='true' and ./@inferenceprovenance != 'dedup']");
|
29
|
|
30
|
@Before
|
31
|
public void setUp() throws Exception {
|
32
|
fieldExtractor = new RecordFieldsExtractor();
|
33
|
}
|
34
|
|
35
|
@Test
|
36
|
public void testEnhanced() throws IOException, DocumentException {
|
37
|
|
38
|
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record));
|
39
|
Document doc = new SAXReader().read(new StringReader(recordString));
|
40
|
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
|
41
|
System.out.println(sets);
|
42
|
assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched"));
|
43
|
}
|
44
|
|
45
|
@Test
|
46
|
public void testEnhancedDeduped() throws IOException, DocumentException {
|
47
|
|
48
|
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record2));
|
49
|
Document doc = new SAXReader().read(new StringReader(recordString));
|
50
|
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
|
51
|
System.out.println(sets);
|
52
|
assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched"));
|
53
|
}
|
54
|
|
55
|
@Test
|
56
|
public void testNotEnhanced() throws IOException, DocumentException {
|
57
|
|
58
|
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record3));
|
59
|
Document doc = new SAXReader().read(new StringReader(recordString));
|
60
|
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2"));
|
61
|
System.out.println(sets);
|
62
|
assertTrue(sets.isEmpty());
|
63
|
}
|
64
|
}
|