Project

General

Profile

1 43504 eri.katsar
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer;
2
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.SourceMapper;
3 45621 eri.katsar
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.TargetMapper;
4 43504 eri.katsar
import org.apache.hadoop.conf.Configuration;
5
import org.apache.hadoop.io.LongWritable;
6
import org.apache.hadoop.io.Text;
7
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
8
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
9
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
10 47680 eri.katsar
import org.junit.Before;
11 43504 eri.katsar
import org.junit.Test;
12
13 47680 eri.katsar
import java.io.IOException;
14 43504 eri.katsar
import java.util.ArrayList;
15
import java.util.List;
16
17 44307 eri.katsar
18 43504 eri.katsar
public class PreprocessingTest {
19
20
    MapDriver<LongWritable, Text, Text, Text> mapDriver;
21
    ReduceDriver<Text, Text, Text, Text> reduceDriver;
22
    MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver;
23
24
    @Before
25
    public void setUp() {
26
        DatasetReducer datasetReducer = new DatasetReducer();
27
        reduceDriver = ReduceDriver.newReduceDriver(datasetReducer);
28 45621 eri.katsar
       }
29 43504 eri.katsar
30
    @Test
31 44137 eri.katsar
    public void testMapper() throws IOException {
32 43504 eri.katsar
        Configuration configuration = new Configuration();
33 45528 eri.katsar
        String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
34 44137 eri.katsar
        configuration.set("lod.sourceMappings", sourceMappings);
35 45621 eri.katsar
        mapDriver = MapDriver.newMapDriver(new SourceMapper());
36 44137 eri.katsar
        mapDriver.withConfiguration(configuration);
37 45528 eri.katsar
        mapDriver.withInput(new LongWritable(1),  new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
38
                .withInput(new LongWritable(2),
39
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n"));
40
        mapDriver.withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
41
                "	<http://www.eurocris.org/ontologies/cerif/1.3#name>	\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
42
        mapDriver.runTest(true);
43 43504 eri.katsar
44 45528 eri.katsar
45 45517 eri.katsar
    }
46 45528 eri.katsar
47
48 45621 eri.katsar
49
    @Test
50
    public void testTargetMapper() throws IOException {
51
        Configuration configuration = new Configuration();
52
        String targetMappings = "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}";
53
        configuration.set("lod.targetMappings", targetMappings);
54
55
        mapDriver = MapDriver.newMapDriver(new TargetMapper());
56
        mapDriver.withConfiguration(configuration);
57
        mapDriver.withInput(new LongWritable(1),  new Text("<http://xmlns.com/foaf/0.1/Document> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> .\n"));
58
     //   mapDriver.withOutput(new Text("TARGET,publications,<http://dblp.l3s.de/d2r/resource/journals/publications>"),new Text("\"4OR\""));
59
        mapDriver.runTest(true);
60
61
    }
62
63
64
65
66
    @Test
67 45528 eri.katsar
public void testReducer ()throws Exception {
68
    Configuration configuration = new Configuration();
69
    String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
70
    configuration.set("lod.sourceMappings", sourceMappings);
71
72
    List inputList = new ArrayList();
73
    inputList.add(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
74
            "	<http://www.eurocris.org/ontologies/cerif/1.3#name>	\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
75
76
    reduceDriver.withConfiguration(configuration);
77
    reduceDriver.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), inputList);
78
79
    reduceDriver.run();
80 45517 eri.katsar
}
81 45621 eri.katsar
@Test
82
    public void genericTest(){
83
    String fieldValue="lalalal^^Sssss";
84
    fieldValue=fieldValue.substring(0,fieldValue.indexOf("^"));
85
    System.out.println(fieldValue);
86
    }
87
88 45528 eri.katsar
}
89 45621 eri.katsar
90 45517 eri.katsar
        /*     mapDriver.withConfiguration(configuration)
91 43504 eri.katsar
                .withInput(new LongWritable(1),
92
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
93 45528 eri.katsar
94 44307 eri.katsar
*//*
95 43504 eri.katsar
96 44307 eri.katsar
97
     */
98
/*   mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
99 44137 eri.katsar
                new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\","));
100 44307 eri.katsar
*//*
101
102 43504 eri.katsar
        mapDriver.runTest();
103
    }
104
105
    @Test
106
    public void testReducer() throws IOException {
107
        List<Text> values = new ArrayList<>();
108
        values.add(new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
109 45809 eri.katsar
        limesReducerDriver
110 43504 eri.katsar
                .withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), values)
111
                .withOutput(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
112 45809 eri.katsar
        limesReducerDriver.run();
113 43504 eri.katsar
114
    }
115
116 44137 eri.katsar
    private static List<Pair<LongWritable, Text>> getInput() throws FileNotFoundException {
117
        File file = new File("C:\\Users\\eri_k\\workspace\\lodinterlinking\\branches\\cacheOptimized\\src\\main\\resources\\eu\\dnetlib\\data\\mapreduce\\hbase\\lodExport\\source.nt");
118
        Scanner scanner = new Scanner(new FileInputStream(file));
119
        List<Pair<LongWritable, Text>> input = new ArrayList<Pair<LongWritable, Text>>();
120
        while (scanner.hasNext()) {
121
            String line = scanner.nextLine();
122
            input.add(new Pair<LongWritable, Text>(new LongWritable(0), new Text(line)));
123
        }
124
        return input;
125
    }
126
127 43504 eri.katsar
    @Test
128
    public void testMapReduce() {
129 44307 eri.katsar
        */
130
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
131 43504 eri.katsar
        mapReduceDriver.withOutput(new Text("6"), new IntWritable(2));
132 45517 eri.katsar
        mapReduceDriver.runTest();
133 43504 eri.katsar
134
    }
135
}
136 45517 eri.katsar
*/