1 |
43504
|
eri.katsar
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer;
|
2 |
|
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.SourceMapper;
|
3 |
45621
|
eri.katsar
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.TargetMapper;
|
4 |
43504
|
eri.katsar
|
import org.apache.hadoop.conf.Configuration;
|
5 |
|
|
import org.apache.hadoop.io.LongWritable;
|
6 |
|
|
import org.apache.hadoop.io.Text;
|
7 |
|
|
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
|
8 |
|
|
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
|
9 |
|
|
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
|
10 |
47680
|
eri.katsar
|
import org.junit.Before;
|
11 |
43504
|
eri.katsar
|
import org.junit.Test;
|
12 |
|
|
|
13 |
47680
|
eri.katsar
|
import java.io.IOException;
|
14 |
43504
|
eri.katsar
|
import java.util.ArrayList;
|
15 |
|
|
import java.util.List;
|
16 |
|
|
|
17 |
44307
|
eri.katsar
|
|
18 |
43504
|
eri.katsar
|
public class PreprocessingTest {
|
19 |
|
|
|
20 |
|
|
MapDriver<LongWritable, Text, Text, Text> mapDriver;
|
21 |
|
|
ReduceDriver<Text, Text, Text, Text> reduceDriver;
|
22 |
|
|
MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver;
|
23 |
|
|
|
24 |
|
|
@Before
|
25 |
|
|
public void setUp() {
|
26 |
|
|
DatasetReducer datasetReducer = new DatasetReducer();
|
27 |
|
|
reduceDriver = ReduceDriver.newReduceDriver(datasetReducer);
|
28 |
45621
|
eri.katsar
|
}
|
29 |
43504
|
eri.katsar
|
|
30 |
|
|
@Test
|
31 |
44137
|
eri.katsar
|
public void testMapper() throws IOException {
|
32 |
43504
|
eri.katsar
|
Configuration configuration = new Configuration();
|
33 |
45528
|
eri.katsar
|
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
|
34 |
44137
|
eri.katsar
|
configuration.set("lod.sourceMappings", sourceMappings);
|
35 |
45621
|
eri.katsar
|
mapDriver = MapDriver.newMapDriver(new SourceMapper());
|
36 |
44137
|
eri.katsar
|
mapDriver.withConfiguration(configuration);
|
37 |
45528
|
eri.katsar
|
mapDriver.withInput(new LongWritable(1), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
|
38 |
|
|
.withInput(new LongWritable(2),
|
39 |
|
|
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n"));
|
40 |
|
|
mapDriver.withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
|
41 |
|
|
" <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
42 |
|
|
mapDriver.runTest(true);
|
43 |
43504
|
eri.katsar
|
|
44 |
45528
|
eri.katsar
|
|
45 |
45517
|
eri.katsar
|
}
|
46 |
45528
|
eri.katsar
|
|
47 |
|
|
|
48 |
45621
|
eri.katsar
|
|
49 |
|
|
@Test
|
50 |
|
|
public void testTargetMapper() throws IOException {
|
51 |
|
|
Configuration configuration = new Configuration();
|
52 |
|
|
String targetMappings = "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}";
|
53 |
|
|
configuration.set("lod.targetMappings", targetMappings);
|
54 |
|
|
|
55 |
|
|
mapDriver = MapDriver.newMapDriver(new TargetMapper());
|
56 |
|
|
mapDriver.withConfiguration(configuration);
|
57 |
|
|
mapDriver.withInput(new LongWritable(1), new Text("<http://xmlns.com/foaf/0.1/Document> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> .\n"));
|
58 |
|
|
// mapDriver.withOutput(new Text("TARGET,publications,<http://dblp.l3s.de/d2r/resource/journals/publications>"),new Text("\"4OR\""));
|
59 |
|
|
mapDriver.runTest(true);
|
60 |
|
|
|
61 |
|
|
}
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
@Test
|
67 |
45528
|
eri.katsar
|
public void testReducer ()throws Exception {
|
68 |
|
|
Configuration configuration = new Configuration();
|
69 |
|
|
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
|
70 |
|
|
configuration.set("lod.sourceMappings", sourceMappings);
|
71 |
|
|
|
72 |
|
|
List inputList = new ArrayList();
|
73 |
|
|
inputList.add(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
|
74 |
|
|
" <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
75 |
|
|
|
76 |
|
|
reduceDriver.withConfiguration(configuration);
|
77 |
|
|
reduceDriver.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), inputList);
|
78 |
|
|
|
79 |
|
|
reduceDriver.run();
|
80 |
45517
|
eri.katsar
|
}
|
81 |
45621
|
eri.katsar
|
@Test
|
82 |
|
|
public void genericTest(){
|
83 |
|
|
String fieldValue="lalalal^^Sssss";
|
84 |
|
|
fieldValue=fieldValue.substring(0,fieldValue.indexOf("^"));
|
85 |
|
|
System.out.println(fieldValue);
|
86 |
|
|
}
|
87 |
|
|
|
88 |
45528
|
eri.katsar
|
}
|
89 |
45621
|
eri.katsar
|
|
90 |
45517
|
eri.katsar
|
/* mapDriver.withConfiguration(configuration)
|
91 |
43504
|
eri.katsar
|
.withInput(new LongWritable(1),
|
92 |
|
|
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
|
93 |
45528
|
eri.katsar
|
|
94 |
44307
|
eri.katsar
|
*//*
|
95 |
43504
|
eri.katsar
|
|
96 |
44307
|
eri.katsar
|
|
97 |
|
|
*/
|
98 |
|
|
/* mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
|
99 |
44137
|
eri.katsar
|
new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\","));
|
100 |
44307
|
eri.katsar
|
*//*
|
101 |
|
|
|
102 |
43504
|
eri.katsar
|
mapDriver.runTest();
|
103 |
|
|
}
|
104 |
|
|
|
105 |
|
|
@Test
|
106 |
|
|
public void testReducer() throws IOException {
|
107 |
|
|
List<Text> values = new ArrayList<>();
|
108 |
|
|
values.add(new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
109 |
45809
|
eri.katsar
|
limesReducerDriver
|
110 |
43504
|
eri.katsar
|
.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), values)
|
111 |
|
|
.withOutput(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
112 |
45809
|
eri.katsar
|
limesReducerDriver.run();
|
113 |
43504
|
eri.katsar
|
|
114 |
|
|
}
|
115 |
|
|
|
116 |
44137
|
eri.katsar
|
private static List<Pair<LongWritable, Text>> getInput() throws FileNotFoundException {
|
117 |
|
|
File file = new File("C:\\Users\\eri_k\\workspace\\lodinterlinking\\branches\\cacheOptimized\\src\\main\\resources\\eu\\dnetlib\\data\\mapreduce\\hbase\\lodExport\\source.nt");
|
118 |
|
|
Scanner scanner = new Scanner(new FileInputStream(file));
|
119 |
|
|
List<Pair<LongWritable, Text>> input = new ArrayList<Pair<LongWritable, Text>>();
|
120 |
|
|
while (scanner.hasNext()) {
|
121 |
|
|
String line = scanner.nextLine();
|
122 |
|
|
input.add(new Pair<LongWritable, Text>(new LongWritable(0), new Text(line)));
|
123 |
|
|
}
|
124 |
|
|
return input;
|
125 |
|
|
}
|
126 |
|
|
|
127 |
43504
|
eri.katsar
|
@Test
|
128 |
|
|
public void testMapReduce() {
|
129 |
44307
|
eri.katsar
|
*/
|
130 |
|
|
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
|
131 |
43504
|
eri.katsar
|
mapReduceDriver.withOutput(new Text("6"), new IntWritable(2));
|
132 |
45517
|
eri.katsar
|
mapReduceDriver.runTest();
|
133 |
43504
|
eri.katsar
|
|
134 |
|
|
}
|
135 |
|
|
}
|
136 |
45517
|
eri.katsar
|
*/
|