Project

General

Profile

« Previous | Next » 

Revision 52917

Tagger for the bulk tagging

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/bulktag/ResultTaggerTest.java
1
package eu.dnetlib.data.mapreduce.hbase.bulktag;
2

  
3
import com.googlecode.protobuf.format.JsonFormat;
4
import eu.dnetlib.data.bulktag.CommunityConfiguration;
5
import eu.dnetlib.data.bulktag.CommunityConfigurationFactory;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.OafProtos;
8
import eu.dnetlib.data.proto.ResultProtos;
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.lang3.StringUtils;
11
import org.apache.hadoop.mapreduce.Counter;
12
import org.apache.hadoop.mapreduce.Mapper;
13
import org.dom4j.DocumentException;
14
import org.junit.Before;
15
import org.junit.Test;
16
import org.junit.runner.RunWith;
17
import org.mockito.Mock;
18
import org.mockito.junit.MockitoJUnitRunner;
19

  
20
import java.io.IOException;
21
import java.util.HashSet;
22
import java.util.List;
23
import java.util.Set;
24
import java.util.stream.Collectors;
25

  
26
import static org.junit.Assert.*;
27

  
28
import static org.mockito.Mockito.*;
29

  
30
/**
31
 * Created by miriam on 02/08/2018.
32
 */
33
@RunWith(MockitoJUnitRunner.class)
34
public class ResultTaggerTest {
35

  
36
    private String xml;
37

  
38
    private CommunityConfiguration cc;
39

  
40
    @Mock
41
    private Mapper.Context context;
42

  
43
    @Mock
44
    private Counter counter;
45

  
46
    private ResultTagger resultTagger = new ResultTagger();
47

  
48
    @Before
49
    public void setUp() throws IOException, DocumentException {
50
        xml = IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml"));
51

  
52
        when(context.getCounter(anyString(), anyString())).thenReturn(counter);
53

  
54
        cc = CommunityConfigurationFactory.newInstance(xml);
55
        assertEquals(cc.size(),4);
56
        cc.getCommunityList().forEach(c -> assertTrue(StringUtils.isNoneBlank(c.getId())));
57
    }
58

  
59
    @Test
60
    public void testResultTaggerSubject() throws IOException {
61
        OafProtos.Oaf oaf = getOaf("oaf_subject.json");
62

  
63
        assertTrue(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty());
64

  
65
        oaf = resultTagger.enrichContext(oaf, cc, context);
66
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
67
        assertNotNull(contextList);
68
        assertEquals(contextList.size(),1);
69
        assertEquals(contextList.get(0).getId(),"ni");
70

  
71
    }
72

  
73
    @Test
74
    public void testResultTaggerSubject2() throws IOException {
75
        OafProtos.Oaf oaf = getOaf("oaf_subject_2.jason");
76

  
77
        assertTrue(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty());
78

  
79
        oaf = resultTagger.enrichContext(oaf, cc, context);
80
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
81
        assertNotNull(contextList);
82
        assertEquals(contextList.size(),2);
83
        Set<String> ids = new HashSet<>();
84
        ids.addAll(contextList.stream().map(c->c.getId()).collect(Collectors.toList()));
85
        assertTrue(ids.contains("mes"));
86
        assertTrue(ids.contains("ni"));
87
        System.out.println(oaf.toString());
88
    }
89

  
90
    @Test
91
    public void testResultTaggerExistingContext() throws IOException {
92
        OafProtos.Oaf oaf = getOaf("oaf_existingcontext.json");
93

  
94
        assertFalse(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty());
95

  
96
        oaf = resultTagger.enrichContext(oaf, cc, context);
97
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
98
        assertNotNull(contextList);
99
        assertEquals(contextList.size(),1);
100
        assertEquals(contextList.get(0).getId(),"ni");
101
        assertEquals(contextList.get(0).getDataInfoCount(),1);
102
        assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community");
103

  
104
    }
105

  
106
    @Test
107
    public void testReadOaf() throws IOException {
108

  
109
        assertNotNull(getOaf("oaf_existingcontext_withprovenance.json"));
110
    }
111

  
112
    @Test
113
    public void testResultTaggerExistingContextWithProvenance() throws IOException{
114
        OafProtos.Oaf oaf = getOaf("oaf_existingcontext_withprovenance.json");
115

  
116
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
117
        assertFalse(contextList.isEmpty());
118
        assertEquals(contextList.size(),1);
119
        assertEquals(contextList.get(0).getId(),"ni");
120
        assertEquals(contextList.get(0).getDataInfoCount(),1);
121
        assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community");
122
        oaf = resultTagger.enrichContext(oaf, cc, context);
123
        contextList = oaf.getEntity().getResult().getMetadata().getContextList();
124
        assertEquals(contextList.size(),1);
125
        assertEquals(contextList.get(0).getId(),"ni");
126
        assertEquals(contextList.get(0).getDataInfoCount(),1);
127
        assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community");
128
    }
129

  
130
    @Test
131
    public void testResultTaggerDatasource() throws IOException{
132
        OafProtos.Oaf oaf = getOaf("oaf_datasource.json");
133
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
134
        assertTrue(contextList.isEmpty());
135
        oaf = resultTagger.enrichContext(oaf,cc,context);
136
        contextList = oaf.getEntity().getResult().getMetadata().getContextList();
137
        assertFalse(contextList.isEmpty());
138
        assertEquals(contextList.size(),2);
139
        Set<String> ids = new HashSet<>();
140
        ids.addAll(contextList.stream().map(c->{
141
            assertTrue(c.getDataInfoCount()== 1);
142
            return c.getId();
143
        }).collect(Collectors.toList()));
144
        assertTrue(ids.contains("aginfra"));
145
        assertTrue(ids.contains("ni"));
146
    }
147

  
148
    @Test
149
    public void testResultTaggerDatasource2() throws IOException{
150
        OafProtos.Oaf oaf = getOaf("oaf_datasource2.json");
151
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
152
        assertTrue(contextList.isEmpty());
153
        oaf = resultTagger.enrichContext(oaf,cc,context);
154
        contextList = oaf.getEntity().getResult().getMetadata().getContextList();
155
        assertFalse(contextList.isEmpty());
156
        assertEquals(contextList.size(),3);
157
        Set<String> ids = new HashSet<>();
158
        ids.addAll(contextList.stream().map(c->{
159
            assertTrue(c.getDataInfoCount()== 1);
160
            return c.getId();
161
        }).collect(Collectors.toList()));
162
        assertTrue(ids.contains("aginfra"));
163
        assertTrue(ids.contains("ni"));
164
        assertTrue(ids.contains("mes"));
165
    }
166

  
167
    @Test
168
    public void testResultTaggerDatasource3() throws IOException{
169
        OafProtos.Oaf oaf = getOaf("oaf_existingcontext3.json");
170
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
171
        assertFalse(contextList.isEmpty());
172
        oaf = resultTagger.enrichContext(oaf,cc,context);
173
        contextList = oaf.getEntity().getResult().getMetadata().getContextList();
174
        assertFalse(contextList.isEmpty());
175
        assertEquals(contextList.size(),1);
176
        assertTrue(contextList.get(0).getId().equals("ni"));
177
        assertTrue(contextList.get(0).getDataInfoCount() == 2);
178
        Set<String> provenance = new HashSet<>();
179

  
180
        final List<FieldTypeProtos.DataInfo> dataInfoList = contextList.get(0).getDataInfoList();
181
        provenance.addAll(dataInfoList.stream().map(d->d.getInferenceprovenance()).collect(Collectors.toList()));
182
        assertTrue(provenance.size()==2);
183
        assertTrue(provenance.contains("bulktagging::community"));
184

  
185
    }
186

  
187
    private OafProtos.Oaf getOaf(String oafjson) throws IOException {
188
        final String json = IOUtils.toString(getClass().getResourceAsStream(oafjson));
189

  
190
        final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
191
        JsonFormat.merge(json, oaf);
192

  
193
        return oaf.build();
194
    }
195
}

Also available in: Unified diff