Project

General

Profile

1
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer;
2
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.SourceMapper;
3
import eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.TargetMapper;
4
import org.apache.hadoop.conf.Configuration;
5
import org.apache.hadoop.io.LongWritable;
6
import org.apache.hadoop.io.Text;
7
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
8
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
9
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
10
import org.junit.Before;
11
import org.junit.Test;
12

    
13
import java.io.IOException;
14
import java.util.ArrayList;
15
import java.util.List;
16

    
17

    
18
public class PreprocessingTest {
19

    
20
    MapDriver<LongWritable, Text, Text, Text> mapDriver;
21
    ReduceDriver<Text, Text, Text, Text> reduceDriver;
22
    MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver;
23

    
24
    @Before
25
    public void setUp() {
26
        DatasetReducer datasetReducer = new DatasetReducer();
27
        reduceDriver = ReduceDriver.newReduceDriver(datasetReducer);
28
       }
29

    
30
    @Test
31
    public void testMapper() throws IOException {
32
        Configuration configuration = new Configuration();
33
        String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
34
        configuration.set("lod.sourceMappings", sourceMappings);
35
        mapDriver = MapDriver.newMapDriver(new SourceMapper());
36
        mapDriver.withConfiguration(configuration);
37
        mapDriver.withInput(new LongWritable(1),  new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
38
                .withInput(new LongWritable(2),
39
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n"));
40
        mapDriver.withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
41
                "	<http://www.eurocris.org/ontologies/cerif/1.3#name>	\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
42
        mapDriver.runTest(true);
43

    
44

    
45
    }
46

    
47

    
48

    
49
    @Test
50
    public void testTargetMapper() throws IOException {
51
        Configuration configuration = new Configuration();
52
        String targetMappings = "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}";
53
        configuration.set("lod.targetMappings", targetMappings);
54

    
55
        mapDriver = MapDriver.newMapDriver(new TargetMapper());
56
        mapDriver.withConfiguration(configuration);
57
        mapDriver.withInput(new LongWritable(1),  new Text("<http://xmlns.com/foaf/0.1/Document> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> .\n"));
58
     //   mapDriver.withOutput(new Text("TARGET,publications,<http://dblp.l3s.de/d2r/resource/journals/publications>"),new Text("\"4OR\""));
59
        mapDriver.runTest(true);
60

    
61
    }
62

    
63

    
64

    
65

    
66
    @Test
67
public void testReducer ()throws Exception {
68
    Configuration configuration = new Configuration();
69
    String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
70
    configuration.set("lod.sourceMappings", sourceMappings);
71

    
72
    List inputList = new ArrayList();
73
    inputList.add(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>" +
74
            "	<http://www.eurocris.org/ontologies/cerif/1.3#name>	\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
75

    
76
    reduceDriver.withConfiguration(configuration);
77
    reduceDriver.withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>,"), inputList);
78

    
79
    reduceDriver.run();
80
}
81
@Test
82
    public void genericTest(){
83
    String fieldValue="lalalal^^Sssss";
84
    fieldValue=fieldValue.substring(0,fieldValue.indexOf("^"));
85
    System.out.println(fieldValue);
86
    }
87

    
88
}
89

    
90
        /*     mapDriver.withConfiguration(configuration)
91
                .withInput(new LongWritable(1),
92
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
93

    
94
*//*
95

    
96

    
97
     */
98
/*   mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
99
                new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\","));
100
*//*
101

    
102
        mapDriver.runTest();
103
    }
104

    
105
    @Test
106
    public void testReducer() throws IOException {
107
        List<Text> values = new ArrayList<>();
108
        values.add(new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
109
        limesReducerDriver
110
                .withInput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), values)
111
                .withOutput(new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\""));
112
        limesReducerDriver.run();
113

    
114
    }
115

    
116
    private static List<Pair<LongWritable, Text>> getInput() throws FileNotFoundException {
117
        File file = new File("C:\\Users\\eri_k\\workspace\\lodinterlinking\\branches\\cacheOptimized\\src\\main\\resources\\eu\\dnetlib\\data\\mapreduce\\hbase\\lodExport\\source.nt");
118
        Scanner scanner = new Scanner(new FileInputStream(file));
119
        List<Pair<LongWritable, Text>> input = new ArrayList<Pair<LongWritable, Text>>();
120
        while (scanner.hasNext()) {
121
            String line = scanner.nextLine();
122
            input.add(new Pair<LongWritable, Text>(new LongWritable(0), new Text(line)));
123
        }
124
        return input;
125
    }
126

    
127
    @Test
128
    public void testMapReduce() {
129
        */
130
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
131
        mapReduceDriver.withOutput(new Text("6"), new IntWritable(2));
132
        mapReduceDriver.runTest();
133

    
134
    }
135
}
136
*/
(3-3/4)