1
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.linkage.LimesReducer;
|
2
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.linkage.LinkCustomReducer;
|
3
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.linkage.LinkMapper;
|
4
|
import org.apache.hadoop.conf.Configuration;
|
5
|
import org.apache.hadoop.io.Text;
|
6
|
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
|
7
|
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
|
8
|
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
|
9
|
import org.junit.Before;
|
10
|
import org.junit.Test;
|
11
|
|
12
|
import java.io.IOException;
|
13
|
import java.util.ArrayList;
|
14
|
import java.util.List;
|
15
|
|
16
|
|
17
|
public class LinkageTest {
|
18
|
|
19
|
MapDriver<Text, Text, Text, Text> mapDriver;
|
20
|
ReduceDriver<Text, Text, Text, Text> limesReducerDriver;
|
21
|
ReduceDriver<Text, Text, Text, Text> linkCustomReducerDriver;
|
22
|
Configuration configuration;
|
23
|
|
24
|
@Before
|
25
|
public void setUp() {
|
26
|
configuration = new Configuration();
|
27
|
configuration.set("lod.sourceMappings", "{\"type\":\"result\",\"fields\":[\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\",\"<http://purl.org/dc/terms/identifier>\"]}");
|
28
|
configuration.set("lod.targetMappings", "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}");
|
29
|
|
30
|
configuration.set("lod.redisHost", "83.212.96.39");
|
31
|
configuration.set("lod.redisPort", "6379");
|
32
|
configuration.set("mapred.reduce.tasks", "1");
|
33
|
configuration.set("lod.configXML",
|
34
|
"<?xml version=\"1.0\" encoding=\"UTF-8\"?> <!DOCTYPE LIMES SYSTEM \"limes.dtd\"> <LIMES> <PREFIX> <NAMESPACE>http://www.w3.org/1999/02/22-rdf-syntax-ns#</NAMESPACE> <LABEL>rdf</LABEL> </PREFIX> <PREFIX> <NAMESPACE>http://www.w3.org/2000/01/rdf-schema#</NAMESPACE> <LABEL>rdfs</LABEL> </PREFIX> <SOURCE> <ID>source1</ID> <ENDPOINT>/user/kanakakis/openaireNT</ENDPOINT> <VAR>?x</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?x rdf:type http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity</RESTRICTION> <PROPERTY>http://lod.openaire.eu/vocab/year RENAME Year</PROPERTY> <PROPERTY>http://www.eurocris.org/ontologies/cerif/1.3#name AS lowercase->regexreplace(\"[^A-Za-z0-9]\",\" \") RENAME publicationName</PROPERTY> </SOURCE> <TARGET> <ID>source2</ID> <ENDPOINT>/user/kanakakis/dblpNT</ENDPOINT> <VAR>?y</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?y rdf:type http://swrc.ontoware.org/ontology#Article</RESTRICTION> <PROPERTY>http://purl.org/dc/terms/issued RENAME Year</PROPERTY> <PROPERTY>http://www.w3.org/2000/01/rdf-schema#label AS lowercase->regexreplace(\"[^A-Za-z0-9]\",\" \") RENAME articleName</PROPERTY> </TARGET> <METRIC>AND(jaro(x.publicationName,y.articleName)|0.95,jaro(x.Year,y.Year)|1.0)</METRIC> <ACCEPTANCE> <THRESHOLD>0.95</THRESHOLD> <FILE>/user/kanakakis/accepted_links_0.95</FILE> <RELATION>owl:sameAs</RELATION> </ACCEPTANCE> <REVIEW> <THRESHOLD>0.95</THRESHOLD> <FILE>/user/kanakakis/verified_links_0.95</FILE> <RELATION>owl:sameAs</RELATION> </REVIEW> <EXECUTION>Default</EXECUTION> <OUTPUT>TTL</OUTPUT> </LIMES>");
|
35
|
configuration.set("lod.limesDTD", "<?xml version=\"1.0\" encoding=\"utf-8\"?> <!ELEMENT LIMES (PREFIX*, SOURCE, TARGET, METRIC, ACCEPTANCE, REVIEW, EXECUTION*, GRANULARITY*, OUTPUT*)> <!ELEMENT PREFIX (NAMESPACE, LABEL)> <!ELEMENT NAMESPACE (#PCDATA)> <!ELEMENT LABEL (#PCDATA)> <!ELEMENT SOURCE (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT TARGET (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT ID (#PCDATA)> <!ELEMENT RESTRICTION (#PCDATA)> <!ELEMENT METRIC (#PCDATA)> <!ELEMENT ACCEPTANCE (THRESHOLD, FILE, RELATION)> <!ELEMENT REVIEW (THRESHOLD, FILE, RELATION)> <!ELEMENT RELATION (#PCDATA)> <!ELEMENT ENDPOINT (#PCDATA)> <!ELEMENT GRAPH (#PCDATA)> <!ELEMENT VAR (#PCDATA)> <!ELEMENT CLASS (#PCDATA)> <!ELEMENT PROPERTY (#PCDATA)> <!ELEMENT TYPE (#PCDATA)> <!ELEMENT THRESHOLD (#PCDATA)> <!ELEMENT FILE (#PCDATA)> <!ELEMENT PAGESIZE (#PCDATA)> <!ELEMENT EXECUTION (#PCDATA)> <!ELEMENT GRANULARITY (#PCDATA)> <!ELEMENT OUTPUT (#PCDATA)>");
|
36
|
|
37
|
mapDriver = MapDriver.newMapDriver(new LinkMapper());
|
38
|
limesReducerDriver = ReduceDriver.newReduceDriver(new LimesReducer());
|
39
|
linkCustomReducerDriver = ReduceDriver.newReduceDriver(new LinkCustomReducer());
|
40
|
}
|
41
|
|
42
|
@Test
|
43
|
public void testMapper() {
|
44
|
mapDriver.withConfiguration(configuration)
|
45
|
.withInput(new Text("0 equals"),
|
46
|
new Text("source_<http://lod.openaire.eu/data/result/od________18::7d2f9a16dec5143412d4bddf796d3c35>,target_<http://dblp.l3s.de/d2r/resource/publications/journals/corr/WangYP15>,"));
|
47
|
mapDriver
|
48
|
.withOutput(new Text("(0 equals"), new Text("source_<http://lod.openaire.eu/data/result/od________18::7d2f9a16dec5143412d4bddf796d3c35>,target_<http://dblp.l3s.de/d2r/resource/publications/journals/corr/WangYP15>,"));
|
49
|
|
50
|
mapDriver.runTest();
|
51
|
}
|
52
|
|
53
|
@Test
|
54
|
public void testReducer() throws IOException {
|
55
|
List<Text> values = new ArrayList<>();
|
56
|
values.add(new Text("source_<http://lod.openaire.eu/data/result/webcrawl____::478b3b9a8279a5ec96356c2c68bfab60>"));
|
57
|
values.add(new Text("target_<http://dblp.l3s.de/d2r/resource/publications/journals/corr/WangYP15>"));
|
58
|
|
59
|
limesReducerDriver.withConfiguration(configuration)
|
60
|
.withInput(new Text("OA"), values)
|
61
|
.run();
|
62
|
|
63
|
}
|
64
|
|
65
|
@Test
|
66
|
public void Custom() throws IOException {
|
67
|
List<Text> values = new ArrayList<>();
|
68
|
values.add(new Text("source_<http://lod.openaire.eu/data/result/webcrawl____::478b3b9a8279a5ec96356c2c68bfab60>,target_<http://dblp.l3s.de/d2r/resource/publications/journals/corr/WangYP15>"));
|
69
|
// values.add(new Text("target_<http://dblp.l3s.de/d2r/resource/publications/journals/corr/WangYP15>"));
|
70
|
|
71
|
linkCustomReducerDriver.withConfiguration(configuration)
|
72
|
.withInput(new Text("OA"), values)
|
73
|
.run();
|
74
|
|
75
|
}
|
76
|
|
77
|
|
78
|
}
|
79
|
|