1
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer;
|
2
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.SourceMapper;
|
3
|
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.TargetMapper;
|
4
|
import org.apache.hadoop.conf.Configuration;
|
5
|
import org.apache.hadoop.io.LongWritable;
|
6
|
import org.apache.hadoop.io.Text;
|
7
|
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
|
8
|
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
|
9
|
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
|
10
|
import org.junit.Before;
|
11
|
import org.junit.Test;
|
12
|
|
13
|
import java.io.IOException;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.List;
|
16
|
|
17
|
|
18
|
public class PreprocessingTest {
|
19
|
|
20
|
MapDriver<LongWritable, Text, Text, Text> mapDriver;
|
21
|
ReduceDriver<Text, Text, Text, Text> reduceDriver;
|
22
|
MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver;
|
23
|
|
24
|
@Before
|
25
|
public void setUp() {
|
26
|
DatasetReducer datasetReducer = new DatasetReducer();
|
27
|
reduceDriver = ReduceDriver.newReduceDriver(datasetReducer);
|
28
|
}
|
29
|
|
30
|
@Test
|
31
|
public void testMapper() throws IOException {
|
32
|
Configuration configuration = new Configuration();
|
33
|
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
|
34
|
configuration.set("lod.sourceMappings", sourceMappings);
|
35
|
mapDriver = MapDriver.newMapDriver(new SourceMapper());
|
36
|
mapDriver.withConfiguration(configuration);
|
37
|
mapDriver.withInput(new LongWritable(1), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
|
38
|
.withInput(new LongWritable(2),
|
39
|
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n"));
|
40
|
mapDriver.withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
|
41
|
" <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
42
|
mapDriver.runTest(true);
|
43
|
|
44
|
|
45
|
}
|
46
|
|
47
|
|
48
|
|
49
|
@Test
|
50
|
public void testTargetMapper() throws IOException {
|
51
|
Configuration configuration = new Configuration();
|
52
|
String targetMappings = "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}";
|
53
|
configuration.set("lod.targetMappings", targetMappings);
|
54
|
|
55
|
mapDriver = MapDriver.newMapDriver(new TargetMapper());
|
56
|
mapDriver.withConfiguration(configuration);
|
57
|
mapDriver.withInput(new LongWritable(1), new Text("<http://xmlns.com/foaf/0.1/Document> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> .\n"));
|
58
|
// mapDriver.withOutput(new Text("TARGET,publications,<http://dblp.l3s.de/d2r/resource/journals/publications>"),new Text("\"4OR\""));
|
59
|
mapDriver.runTest(true);
|
60
|
|
61
|
}
|
62
|
|
63
|
|
64
|
|
65
|
|
66
|
@Test
|
67
|
public void testReducer ()throws Exception {
|
68
|
Configuration configuration = new Configuration();
|
69
|
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
|
70
|
configuration.set("lod.sourceMappings", sourceMappings);
|
71
|
|
72
|
List inputList = new ArrayList();
|
73
|
inputList.add(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
|
74
|
" <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
75
|
|
76
|
reduceDriver.withConfiguration(configuration);
|
77
|
reduceDriver.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), inputList);
|
78
|
|
79
|
reduceDriver.run();
|
80
|
}
|
81
|
@Test
|
82
|
public void genericTest(){
|
83
|
String fieldValue="lalalal^^Sssss";
|
84
|
fieldValue=fieldValue.substring(0,fieldValue.indexOf("^"));
|
85
|
System.out.println(fieldValue);
|
86
|
}
|
87
|
|
88
|
}
|
89
|
|
90
|
/* mapDriver.withConfiguration(configuration)
|
91
|
.withInput(new LongWritable(1),
|
92
|
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
|
93
|
|
94
|
*//*
|
95
|
|
96
|
|
97
|
*/
|
98
|
/* mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
|
99
|
new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\","));
|
100
|
*//*
|
101
|
|
102
|
mapDriver.runTest();
|
103
|
}
|
104
|
|
105
|
@Test
|
106
|
public void testReducer() throws IOException {
|
107
|
List<Text> values = new ArrayList<>();
|
108
|
values.add(new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
109
|
limesReducerDriver
|
110
|
.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), values)
|
111
|
.withOutput(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\""));
|
112
|
limesReducerDriver.run();
|
113
|
|
114
|
}
|
115
|
|
116
|
private static List<Pair<LongWritable, Text>> getInput() throws FileNotFoundException {
|
117
|
File file = new File("C:\\Users\\eri_k\\workspace\\lodinterlinking\\branches\\cacheOptimized\\src\\main\\resources\\eu\\dnetlib\\data\\mapreduce\\hbase\\lodExport\\source.nt");
|
118
|
Scanner scanner = new Scanner(new FileInputStream(file));
|
119
|
List<Pair<LongWritable, Text>> input = new ArrayList<Pair<LongWritable, Text>>();
|
120
|
while (scanner.hasNext()) {
|
121
|
String line = scanner.nextLine();
|
122
|
input.add(new Pair<LongWritable, Text>(new LongWritable(0), new Text(line)));
|
123
|
}
|
124
|
return input;
|
125
|
}
|
126
|
|
127
|
@Test
|
128
|
public void testMapReduce() {
|
129
|
*/
|
130
|
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
|
131
|
mapReduceDriver.withOutput(new Text("6"), new IntWritable(2));
|
132
|
mapReduceDriver.runTest();
|
133
|
|
134
|
}
|
135
|
}
|
136
|
*/
|