Revision 52917
Added by Miriam Baglioni over 6 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/bulktag/ResultTaggerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.bulktag; |
|
2 |
|
|
3 |
import com.googlecode.protobuf.format.JsonFormat; |
|
4 |
import eu.dnetlib.data.bulktag.CommunityConfiguration; |
|
5 |
import eu.dnetlib.data.bulktag.CommunityConfigurationFactory; |
|
6 |
import eu.dnetlib.data.proto.FieldTypeProtos; |
|
7 |
import eu.dnetlib.data.proto.OafProtos; |
|
8 |
import eu.dnetlib.data.proto.ResultProtos; |
|
9 |
import org.apache.commons.io.IOUtils; |
|
10 |
import org.apache.commons.lang3.StringUtils; |
|
11 |
import org.apache.hadoop.mapreduce.Counter; |
|
12 |
import org.apache.hadoop.mapreduce.Mapper; |
|
13 |
import org.dom4j.DocumentException; |
|
14 |
import org.junit.Before; |
|
15 |
import org.junit.Test; |
|
16 |
import org.junit.runner.RunWith; |
|
17 |
import org.mockito.Mock; |
|
18 |
import org.mockito.junit.MockitoJUnitRunner; |
|
19 |
|
|
20 |
import java.io.IOException; |
|
21 |
import java.util.HashSet; |
|
22 |
import java.util.List; |
|
23 |
import java.util.Set; |
|
24 |
import java.util.stream.Collectors; |
|
25 |
|
|
26 |
import static org.junit.Assert.*; |
|
27 |
|
|
28 |
import static org.mockito.Mockito.*; |
|
29 |
|
|
30 |
/** |
|
31 |
* Created by miriam on 02/08/2018. |
|
32 |
*/ |
|
33 |
@RunWith(MockitoJUnitRunner.class) |
|
34 |
public class ResultTaggerTest { |
|
35 |
|
|
36 |
private String xml; |
|
37 |
|
|
38 |
private CommunityConfiguration cc; |
|
39 |
|
|
40 |
@Mock |
|
41 |
private Mapper.Context context; |
|
42 |
|
|
43 |
@Mock |
|
44 |
private Counter counter; |
|
45 |
|
|
46 |
private ResultTagger resultTagger = new ResultTagger(); |
|
47 |
|
|
48 |
@Before |
|
49 |
public void setUp() throws IOException, DocumentException { |
|
50 |
xml = IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml")); |
|
51 |
|
|
52 |
when(context.getCounter(anyString(), anyString())).thenReturn(counter); |
|
53 |
|
|
54 |
cc = CommunityConfigurationFactory.newInstance(xml); |
|
55 |
assertEquals(cc.size(),4); |
|
56 |
cc.getCommunityList().forEach(c -> assertTrue(StringUtils.isNoneBlank(c.getId()))); |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testResultTaggerSubject() throws IOException { |
|
61 |
OafProtos.Oaf oaf = getOaf("oaf_subject.json"); |
|
62 |
|
|
63 |
assertTrue(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty()); |
|
64 |
|
|
65 |
oaf = resultTagger.enrichContext(oaf, cc, context); |
|
66 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
67 |
assertNotNull(contextList); |
|
68 |
assertEquals(contextList.size(),1); |
|
69 |
assertEquals(contextList.get(0).getId(),"ni"); |
|
70 |
|
|
71 |
} |
|
72 |
|
|
73 |
@Test |
|
74 |
public void testResultTaggerSubject2() throws IOException { |
|
75 |
OafProtos.Oaf oaf = getOaf("oaf_subject_2.jason"); |
|
76 |
|
|
77 |
assertTrue(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty()); |
|
78 |
|
|
79 |
oaf = resultTagger.enrichContext(oaf, cc, context); |
|
80 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
81 |
assertNotNull(contextList); |
|
82 |
assertEquals(contextList.size(),2); |
|
83 |
Set<String> ids = new HashSet<>(); |
|
84 |
ids.addAll(contextList.stream().map(c->c.getId()).collect(Collectors.toList())); |
|
85 |
assertTrue(ids.contains("mes")); |
|
86 |
assertTrue(ids.contains("ni")); |
|
87 |
System.out.println(oaf.toString()); |
|
88 |
} |
|
89 |
|
|
90 |
@Test |
|
91 |
public void testResultTaggerExistingContext() throws IOException { |
|
92 |
OafProtos.Oaf oaf = getOaf("oaf_existingcontext.json"); |
|
93 |
|
|
94 |
assertFalse(oaf.getEntity().getResult().getMetadata().getContextList().isEmpty()); |
|
95 |
|
|
96 |
oaf = resultTagger.enrichContext(oaf, cc, context); |
|
97 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
98 |
assertNotNull(contextList); |
|
99 |
assertEquals(contextList.size(),1); |
|
100 |
assertEquals(contextList.get(0).getId(),"ni"); |
|
101 |
assertEquals(contextList.get(0).getDataInfoCount(),1); |
|
102 |
assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community"); |
|
103 |
|
|
104 |
} |
|
105 |
|
|
106 |
@Test |
|
107 |
public void testReadOaf() throws IOException { |
|
108 |
|
|
109 |
assertNotNull(getOaf("oaf_existingcontext_withprovenance.json")); |
|
110 |
} |
|
111 |
|
|
112 |
@Test |
|
113 |
public void testResultTaggerExistingContextWithProvenance() throws IOException{ |
|
114 |
OafProtos.Oaf oaf = getOaf("oaf_existingcontext_withprovenance.json"); |
|
115 |
|
|
116 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
117 |
assertFalse(contextList.isEmpty()); |
|
118 |
assertEquals(contextList.size(),1); |
|
119 |
assertEquals(contextList.get(0).getId(),"ni"); |
|
120 |
assertEquals(contextList.get(0).getDataInfoCount(),1); |
|
121 |
assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community"); |
|
122 |
oaf = resultTagger.enrichContext(oaf, cc, context); |
|
123 |
contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
124 |
assertEquals(contextList.size(),1); |
|
125 |
assertEquals(contextList.get(0).getId(),"ni"); |
|
126 |
assertEquals(contextList.get(0).getDataInfoCount(),1); |
|
127 |
assertEquals(contextList.get(0).getDataInfoList().get(0).getInferenceprovenance(),"bulktagging::community"); |
|
128 |
} |
|
129 |
|
|
130 |
@Test |
|
131 |
public void testResultTaggerDatasource() throws IOException{ |
|
132 |
OafProtos.Oaf oaf = getOaf("oaf_datasource.json"); |
|
133 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
134 |
assertTrue(contextList.isEmpty()); |
|
135 |
oaf = resultTagger.enrichContext(oaf,cc,context); |
|
136 |
contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
137 |
assertFalse(contextList.isEmpty()); |
|
138 |
assertEquals(contextList.size(),2); |
|
139 |
Set<String> ids = new HashSet<>(); |
|
140 |
ids.addAll(contextList.stream().map(c->{ |
|
141 |
assertTrue(c.getDataInfoCount()== 1); |
|
142 |
return c.getId(); |
|
143 |
}).collect(Collectors.toList())); |
|
144 |
assertTrue(ids.contains("aginfra")); |
|
145 |
assertTrue(ids.contains("ni")); |
|
146 |
} |
|
147 |
|
|
148 |
@Test |
|
149 |
public void testResultTaggerDatasource2() throws IOException{ |
|
150 |
OafProtos.Oaf oaf = getOaf("oaf_datasource2.json"); |
|
151 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
152 |
assertTrue(contextList.isEmpty()); |
|
153 |
oaf = resultTagger.enrichContext(oaf,cc,context); |
|
154 |
contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
155 |
assertFalse(contextList.isEmpty()); |
|
156 |
assertEquals(contextList.size(),3); |
|
157 |
Set<String> ids = new HashSet<>(); |
|
158 |
ids.addAll(contextList.stream().map(c->{ |
|
159 |
assertTrue(c.getDataInfoCount()== 1); |
|
160 |
return c.getId(); |
|
161 |
}).collect(Collectors.toList())); |
|
162 |
assertTrue(ids.contains("aginfra")); |
|
163 |
assertTrue(ids.contains("ni")); |
|
164 |
assertTrue(ids.contains("mes")); |
|
165 |
} |
|
166 |
|
|
167 |
@Test |
|
168 |
public void testResultTaggerDatasource3() throws IOException{ |
|
169 |
OafProtos.Oaf oaf = getOaf("oaf_existingcontext3.json"); |
|
170 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
171 |
assertFalse(contextList.isEmpty()); |
|
172 |
oaf = resultTagger.enrichContext(oaf,cc,context); |
|
173 |
contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
174 |
assertFalse(contextList.isEmpty()); |
|
175 |
assertEquals(contextList.size(),1); |
|
176 |
assertTrue(contextList.get(0).getId().equals("ni")); |
|
177 |
assertTrue(contextList.get(0).getDataInfoCount() == 2); |
|
178 |
Set<String> provenance = new HashSet<>(); |
|
179 |
|
|
180 |
final List<FieldTypeProtos.DataInfo> dataInfoList = contextList.get(0).getDataInfoList(); |
|
181 |
provenance.addAll(dataInfoList.stream().map(d->d.getInferenceprovenance()).collect(Collectors.toList())); |
|
182 |
assertTrue(provenance.size()==2); |
|
183 |
assertTrue(provenance.contains("bulktagging::community")); |
|
184 |
|
|
185 |
} |
|
186 |
|
|
187 |
private OafProtos.Oaf getOaf(String oafjson) throws IOException { |
|
188 |
final String json = IOUtils.toString(getClass().getResourceAsStream(oafjson)); |
|
189 |
|
|
190 |
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder(); |
|
191 |
JsonFormat.merge(json, oaf); |
|
192 |
|
|
193 |
return oaf.build(); |
|
194 |
} |
|
195 |
} |
Also available in: Unified diff
Tagger for the bulk tagging