Revision 51226
Added by Claudio Atzori over 6 years ago
modules/dnet-graph-domain/trunk/deploy.info | ||
---|---|---|
1 |
{ |
|
2 |
"type_source": "SVN", |
|
3 |
"goal": "package -U source:jar", |
|
4 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-graph-domain/trunk/", |
|
5 |
"deploy_repository": "dnet5-snapshots", |
|
6 |
"version": "5", |
|
7 |
"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", |
|
8 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet5-snapshots", |
|
9 |
"name": "dnet-graph-domain" |
|
10 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.distance; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import eu.dnetlib.pace.distance.eval.ScoreResult; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import com.google.common.collect.Lists; |
|
12 |
|
|
13 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
14 |
import eu.dnetlib.pace.config.Config; |
|
15 |
import eu.dnetlib.pace.model.MapDocument; |
|
16 |
|
|
17 |
public class DetectorTest extends AbstractProtoPaceTest { |
|
18 |
|
|
19 |
@Test |
|
20 |
public void testScoreResult() { |
|
21 |
final Config config = getResultProdConf(); |
|
22 |
|
|
23 |
final MapDocument resA = result(config, "A", "Recent results from CDFsd"); |
|
24 |
final MapDocument resB = result(config, "B", "Recent results from CDF"); |
|
25 |
|
|
26 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
27 |
|
|
28 |
System.out.println(sr.toString()); |
|
29 |
} |
|
30 |
|
|
31 |
@Test |
|
32 |
public void testDistanceResultSimple() { |
|
33 |
final Config config = getResultSimpleConf(); |
|
34 |
|
|
35 |
final MapDocument resA = result(config, "A", "Recent results from CDF"); |
|
36 |
final MapDocument resB = result(config, "B", "Recent results from CDF"); |
|
37 |
|
|
38 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
39 |
final double d = sr.getScore(); |
|
40 |
System.out.println(String.format(" d ---> %s", d)); |
|
41 |
|
|
42 |
assertTrue(d == 1.0); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void testDistanceResultSimpleMissingDates() { |
|
47 |
final Config config = getResultSimpleConf(); |
|
48 |
|
|
49 |
final MapDocument resA = result(config, "A", "Recent results from BES"); |
|
50 |
final MapDocument resB = result(config, "A", "Recent results from CES"); |
|
51 |
|
|
52 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
53 |
final double d = sr.getScore(); |
|
54 |
System.out.println(String.format(" d ---> %s", d)); |
|
55 |
|
|
56 |
assertTrue(d > 0.97); |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testDistanceResultInvalidDate() { |
|
61 |
final Config config = getResultConf(); |
|
62 |
|
|
63 |
final MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05"); |
|
64 |
final MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty"); |
|
65 |
|
|
66 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
67 |
final double d = sr.getScore(); |
|
68 |
System.out.println(String.format(" d ---> %s", d)); |
|
69 |
|
|
70 |
assertTrue(d == 1.0); |
|
71 |
} |
|
72 |
|
|
73 |
@Test |
|
74 |
public void testDistanceResultMissingOneDate() { |
|
75 |
final Config config = getResultConf(); |
|
76 |
|
|
77 |
final MapDocument resA = result(config, "A", "title title title 6BESR", null); |
|
78 |
final MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02"); |
|
79 |
|
|
80 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
81 |
double d = sr.getScore(); |
|
82 |
System.out.println(String.format(" d ---> %s", d)); |
|
83 |
|
|
84 |
assertTrue((d > 0.9) && (d < 1.0)); |
|
85 |
} |
|
86 |
|
|
87 |
@Test |
|
88 |
public void testDistanceResult() { |
|
89 |
final Config config = getResultConf(); |
|
90 |
|
|
91 |
final MapDocument resA = result(config, "A", "title title title BES", ""); |
|
92 |
final MapDocument resB = result(config, "B", "title title title CLEO"); |
|
93 |
|
|
94 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
95 |
double d = sr.getScore(); |
|
96 |
System.out.println(String.format(" d ---> %s", d)); |
|
97 |
|
|
98 |
assertTrue((d > 0.9) && (d < 1.0)); |
|
99 |
} |
|
100 |
|
|
101 |
@Test |
|
102 |
public void testDistanceResultMissingTwoDate() { |
|
103 |
final Config config = getResultConf(); |
|
104 |
|
|
105 |
final MapDocument resA = result(config, "A", "title title title 6BESR"); |
|
106 |
final MapDocument resB = result(config, "B", "title title title 6CLER"); |
|
107 |
|
|
108 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
109 |
double d = sr.getScore(); |
|
110 |
System.out.println(String.format(" d ---> %s", d)); |
|
111 |
|
|
112 |
assertTrue((d > 0.9) && (d < 1.0)); |
|
113 |
} |
|
114 |
|
|
115 |
@Test |
|
116 |
public void testDistanceOrganizationIgnoreMissing() { |
|
117 |
|
|
118 |
final Config config = getOrganizationSimpleConf(); |
|
119 |
|
|
120 |
final MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE"); |
|
121 |
final MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR"); |
|
122 |
|
|
123 |
final ScoreResult sr = new PaceDocumentDistance().between(orgA, orgB, config); |
|
124 |
final double d = sr.getScore(); |
|
125 |
System.out.println(String.format(" d ---> %s", d)); |
|
126 |
|
|
127 |
assertTrue(d == 1.0); |
|
128 |
} |
|
129 |
|
|
130 |
@Test |
|
131 |
public void testDistanceResultCase1() { |
|
132 |
|
|
133 |
final Config config = getResultConf(); |
|
134 |
|
|
135 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003"); |
|
136 |
final MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003"); |
|
137 |
|
|
138 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
139 |
double d = sr.getScore(); |
|
140 |
System.out.println(String.format(" d ---> %s", d)); |
|
141 |
|
|
142 |
assertTrue((d > 0.9) && (d < 1.0)); |
|
143 |
} |
|
144 |
|
|
145 |
@Test |
|
146 |
public void testDistanceResultCaseDoiMatch1() { |
|
147 |
final Config config = getResultConf(); |
|
148 |
|
|
149 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "10.1594/PANGAEA.726855"); |
|
150 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855"); |
|
151 |
|
|
152 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
153 |
double d = sr.getScore(); |
|
154 |
System.out.println(String.format(" d ---> %s", d)); |
|
155 |
|
|
156 |
assertTrue("exact DOIs will produce an exact match", d == 1.0); |
|
157 |
} |
|
158 |
|
|
159 |
@Test |
|
160 |
public void testDistanceResultCaseDoiMatch2() { |
|
161 |
final Config config = getResultConf(); |
|
162 |
|
|
163 |
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1594/PANGAEA.726855"); |
|
164 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "10.1594/PANGAEA.726855"); |
|
165 |
|
|
166 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
167 |
double d = sr.getScore(); |
|
168 |
System.out.println(String.format(" d ---> %s", d)); |
|
169 |
|
|
170 |
assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0); |
|
171 |
} |
|
172 |
|
|
173 |
@Test |
|
174 |
public void testDistanceResultCaseDoiMatch3() { |
|
175 |
final Config config = getResultConf(); |
|
176 |
|
|
177 |
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
178 |
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003"); |
|
179 |
|
|
180 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
181 |
double d = sr.getScore(); |
|
182 |
System.out.println(String.format(" d ---> %s", d)); |
|
183 |
|
|
184 |
assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0); |
|
185 |
} |
|
186 |
|
|
187 |
@Test |
|
188 |
public void testDistanceResultCaseDoiMatch4() { |
|
189 |
final Config config = getResultConf(); |
|
190 |
|
|
191 |
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
192 |
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005"); |
|
193 |
|
|
194 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
195 |
double d = sr.getScore(); |
|
196 |
System.out.println(String.format(" d ---> %s", d)); |
|
197 |
|
|
198 |
assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0); |
|
199 |
} |
|
200 |
|
|
201 |
@Test |
|
202 |
public void testDistanceResultCaseDoiMatch5() { |
|
203 |
|
|
204 |
final Config config = getResultConf(); |
|
205 |
|
|
206 |
final MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020"); |
|
207 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003"); |
|
208 |
|
|
209 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
210 |
double d = sr.getScore(); |
|
211 |
System.out.println(String.format(" d ---> %s", d)); |
|
212 |
|
|
213 |
assertTrue("a missing DOI, comparsion continues with the following conditions", (d > 0.9) && (d < 1.0)); |
|
214 |
} |
|
215 |
|
|
216 |
@Test |
|
217 |
public void testDistanceResultCaseDoiMatch6() { |
|
218 |
final Config config = getResultConf(); |
|
219 |
|
|
220 |
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
221 |
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI"); |
|
222 |
|
|
223 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
224 |
double d = sr.getScore(); |
|
225 |
System.out.println(String.format(" d ---> %s", d)); |
|
226 |
|
|
227 |
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0); |
|
228 |
} |
|
229 |
|
|
230 |
@Test |
|
231 |
public void testDistanceResultCaseDoiMatch7() { |
|
232 |
final Config config = getResultConf(); |
|
233 |
|
|
234 |
final MapDocument resA = result(config, "A", "Adrenal Insufficiency asd asd", "1951", Lists.newArrayList("PMC2037944", "axdsds")); |
|
235 |
final MapDocument resB = result(config, "B", "Adrenal Insufficiency", "1951", "PMC2037944"); |
|
236 |
|
|
237 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
238 |
double d = sr.getScore(); |
|
239 |
System.out.println(String.format(" d ---> %s", d)); |
|
240 |
|
|
241 |
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d > 0.9 & d < 1); |
|
242 |
} |
|
243 |
|
|
244 |
// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855 |
|
245 |
|
|
246 |
@Test |
|
247 |
public void testDistanceResultCaseAuthor1() { |
|
248 |
|
|
249 |
final Config config = getResultAuthorsConf(); |
|
250 |
|
|
251 |
final List<String> authorsA = Lists.newArrayList("a", "b", "c", "d"); |
|
252 |
final List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
253 |
final List<String> pid = Lists.newArrayList(); |
|
254 |
|
|
255 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA); |
|
256 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB); |
|
257 |
|
|
258 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
259 |
final double d = sr.getScore(); |
|
260 |
System.out.println(String.format(" d ---> %s", d)); |
|
261 |
|
|
262 |
assertTrue(d == 0.0); |
|
263 |
} |
|
264 |
|
|
265 |
@Test |
|
266 |
public void testDistanceResultCaseAuthor2() { |
|
267 |
|
|
268 |
final Config config = getResultAuthorsConf(); |
|
269 |
|
|
270 |
final List<String> authorsA = Lists.newArrayList("a", "b", "c"); |
|
271 |
final List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
272 |
final List<String> pid = Lists.newArrayList(); |
|
273 |
|
|
274 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA); |
|
275 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB); |
|
276 |
|
|
277 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
278 |
final double d = sr.getScore(); |
|
279 |
System.out.println(String.format(" d ---> %s", d)); |
|
280 |
|
|
281 |
assertTrue(d == 1.0); |
|
282 |
} |
|
283 |
|
|
284 |
@Test |
|
285 |
public void testDistanceResultCaseAuthor3() { |
|
286 |
|
|
287 |
final Config config = getResultAuthorsConf(); |
|
288 |
|
|
289 |
final List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M."); |
|
290 |
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
291 |
final List<String> pid = Lists.newArrayList(); |
|
292 |
|
|
293 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA); |
|
294 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB); |
|
295 |
|
|
296 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
297 |
double d = sr.getScore(); |
|
298 |
System.out.println(String.format(" d ---> %s", d)); |
|
299 |
|
|
300 |
assertTrue((d > 0.9) && (d < 1.0)); |
|
301 |
} |
|
302 |
|
|
303 |
@Test |
|
304 |
public void testDistanceResultCaseAuthor4() { |
|
305 |
|
|
306 |
final Config config = getResultAuthorsConf(); |
|
307 |
|
|
308 |
final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a"); |
|
309 |
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
310 |
final List<String> pid = Lists.newArrayList(); |
|
311 |
|
|
312 |
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA); |
|
313 |
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB); |
|
314 |
|
|
315 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
316 |
final double d = sr.getScore(); |
|
317 |
System.out.println(String.format(" d ---> %s", d)); |
|
318 |
|
|
319 |
// assertTrue(d.getScore() == 0.0); |
|
320 |
} |
|
321 |
|
|
322 |
@Test |
|
323 |
public void testDistanceResultFullConf() { |
|
324 |
|
|
325 |
final Config config = getResultFullConf(); |
|
326 |
|
|
327 |
final List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva"); |
|
328 |
final List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie"); |
|
329 |
|
|
330 |
final MapDocument resA = |
|
331 |
result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", |
|
332 |
"10.1186/1752-1947-4-299", authorsA); |
|
333 |
|
|
334 |
final MapDocument resB = |
|
335 |
result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", |
|
336 |
"10.1186/1752-1947-4-299", authorsB); |
|
337 |
|
|
338 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
339 |
final double d = sr.getScore(); |
|
340 |
System.out.println(String.format(" d ---> %s", d)); |
|
341 |
|
|
342 |
// assertTrue(d.getScore() == 0.0); |
|
343 |
} |
|
344 |
|
|
345 |
@Test |
|
346 |
public void testDistanceProdConf1() { |
|
347 |
|
|
348 |
final Config config = getResultProdConf(); |
|
349 |
|
|
350 |
final MapDocument resA = |
|
351 |
result(config, |
|
352 |
"A", |
|
353 |
" Analysis of Transfer Embryo-Derived de-duplication"); |
|
354 |
final MapDocument resB = |
|
355 |
result(config, |
|
356 |
"B", |
|
357 |
" Analysis of Transfer Embryo Derived deduplication"); |
|
358 |
|
|
359 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
360 |
final double d = sr.getScore(); |
|
361 |
System.out.println(String.format(" d ---> %s", d)); |
|
362 |
|
|
363 |
// assertTrue(d.getScore() == 0.0); |
|
364 |
} |
|
365 |
|
|
366 |
@Test |
|
367 |
public void testDistanceProdConf2() { |
|
368 |
|
|
369 |
final Config config = getResultProdConf(); |
|
370 |
|
|
371 |
final MapDocument resA = |
|
372 |
result(config, |
|
373 |
"A", |
|
374 |
"qwerty aaabbbbbbbb bbb ccc ddddd"); |
|
375 |
final MapDocument resB = |
|
376 |
result(config, |
|
377 |
"B", |
|
378 |
"qwert aaabbbbbbbb bbb ccc ddddd"); |
|
379 |
|
|
380 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
381 |
final double d = sr.getScore(); |
|
382 |
System.out.println(String.format(" d ---> %s", d)); |
|
383 |
|
|
384 |
// assertTrue(d.getScore() == 0.0); |
|
385 |
} |
|
386 |
|
|
387 |
@Test |
|
388 |
public void testDistanceProdConf3() { |
|
389 |
|
|
390 |
final Config config = getResultProdConf(); |
|
391 |
|
|
392 |
final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a"); |
|
393 |
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
394 |
final List<String> pid = Lists.newArrayList(); |
|
395 |
|
|
396 |
final MapDocument resA = |
|
397 |
result(config, |
|
398 |
"A", |
|
399 |
"qwerty aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsA); |
|
400 |
final MapDocument resB = |
|
401 |
result(config, |
|
402 |
"B", |
|
403 |
"qwert aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsB); |
|
404 |
|
|
405 |
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config); |
|
406 |
final double d = sr.getScore(); |
|
407 |
System.out.println(String.format(" d ---> %s", d)); |
|
408 |
|
|
409 |
assertTrue(d == 0.0); |
|
410 |
} |
|
411 |
|
|
412 |
@Test |
|
413 |
public void testDistancePersonConf1() { |
|
414 |
|
|
415 |
final Config config = getPersonConf(); |
|
416 |
|
|
417 |
final MapDocument p1 = person(config, "p1_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi1.fo.json")); |
|
418 |
final MapDocument p2 = person(config, "p2_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi2.fo.json")); |
|
419 |
|
|
420 |
final ScoreResult sr = new PaceDocumentDistance().between(p1, p2, config); |
|
421 |
final double d = sr.getScore(); |
|
422 |
System.out.println(String.format(" d ---> %s", d)); |
|
423 |
|
|
424 |
// assertTrue(d.getScore() == 0.0); |
|
425 |
} |
|
426 |
|
|
427 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
7 |
import eu.dnetlib.pace.config.Config; |
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
import eu.dnetlib.pace.model.MapDocument; |
|
12 |
|
|
13 |
public class ClusteringCombinerTest extends AbstractProtoPaceTest { |
|
14 |
|
|
15 |
private Config config; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
config = getResultFullConf(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testCombine() { |
|
24 |
String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission"; |
|
25 |
MapDocument result = result(config, "A", title, "2013"); |
|
26 |
|
|
27 |
FieldListImpl fl = new FieldListImpl(); |
|
28 |
fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty")); |
|
29 |
|
|
30 |
result.getFieldMap().put("desc", fl); |
|
31 |
System.out.println(title); |
|
32 |
System.out.println(ClusteringCombiner.combine(result, config)); |
|
33 |
} |
|
34 |
|
|
35 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombinerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
7 |
import eu.dnetlib.pace.config.Config; |
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
import eu.dnetlib.pace.model.MapDocument; |
|
12 |
|
|
13 |
public class BlacklistAwareClusteringCombinerTest extends AbstractProtoPaceTest { |
|
14 |
|
|
15 |
private Config config; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
config = getResultFullConf(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testCombine() { |
|
24 |
final MapDocument result = |
|
25 |
result(config, "A", "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission", "2013"); |
|
26 |
final FieldListImpl fl = new FieldListImpl(); |
|
27 |
fl.add(new FieldValueImpl(Type.String, "desc", "hello world description pipeline")); |
|
28 |
|
|
29 |
result.getFieldMap().put("desc", fl); |
|
30 |
|
|
31 |
fl.clear(); |
|
32 |
fl.add(new FieldValueImpl(Type.String, "title", "lorem ipsum cabalie qwerty")); |
|
33 |
final FieldListImpl field = (FieldListImpl) result.getFieldMap().get("title"); |
|
34 |
field.add(fl); |
|
35 |
|
|
36 |
System.out.println(BlacklistAwareClusteringCombiner.filterAndCombine(result, config, config.blacklists())); |
|
37 |
} |
|
38 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import com.google.gson.Gson; |
|
10 |
import eu.dnetlib.data.mapreduce.util.DNGFTest; |
|
11 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
12 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
13 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
15 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
16 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
17 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
18 |
import eu.dnetlib.data.proto.PublicationProtos.Publication; |
|
19 |
import eu.dnetlib.data.proto.TypeProtos; |
|
20 |
import eu.dnetlib.pace.config.Config; |
|
21 |
import eu.dnetlib.pace.config.DedupConfig; |
|
22 |
import eu.dnetlib.pace.config.Type; |
|
23 |
import eu.dnetlib.pace.model.Field; |
|
24 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
25 |
import eu.dnetlib.pace.model.MapDocument; |
|
26 |
import eu.dnetlib.pace.model.ProtoDocumentBuilder; |
|
27 |
import eu.dnetlib.pace.model.gt.GTAuthor; |
|
28 |
import eu.dnetlib.pace.model.gt.GTAuthorMapper; |
|
29 |
import org.apache.commons.io.IOUtils; |
|
30 |
import org.apache.commons.lang3.RandomStringUtils; |
|
31 |
import org.apache.commons.lang3.StringUtils; |
|
32 |
|
|
33 |
public abstract class AbstractProtoPaceTest extends DNGFTest { |
|
34 |
|
|
35 |
protected DedupConfig getResultFullConf() { |
|
36 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf")); |
|
37 |
} |
|
38 |
|
|
39 |
protected DedupConfig getResultSimpleConf() { |
|
40 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf")); |
|
41 |
} |
|
42 |
|
|
43 |
protected DedupConfig getResultConf() { |
|
44 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf")); |
|
45 |
} |
|
46 |
|
|
47 |
protected DedupConfig getOrganizationSimpleConf() { |
|
48 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf")); |
|
49 |
} |
|
50 |
|
|
51 |
protected DedupConfig getResultAuthorsConf() { |
|
52 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf")); |
|
53 |
} |
|
54 |
|
|
55 |
protected DedupConfig getPersonConf() { |
|
56 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf")); |
|
57 |
} |
|
58 |
|
|
59 |
protected DedupConfig getResultProdConf() { |
|
60 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf")); |
|
61 |
} |
|
62 |
|
|
63 |
protected MapDocument person(final Config conf, final String id, final DNGF oaf) { |
|
64 |
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model()); |
|
65 |
} |
|
66 |
|
|
67 |
protected DNGF getPersonGT(final String path) { |
|
68 |
return new GTAuthorMapper().map(getGTAuthor(path)); |
|
69 |
} |
|
70 |
|
|
71 |
protected GTAuthor getGTAuthor(final String path) { |
|
72 |
|
|
73 |
final Gson gson = new Gson(); |
|
74 |
|
|
75 |
final String json = readFromClasspath(path); |
|
76 |
|
|
77 |
final GTAuthor gta = gson.fromJson(json, GTAuthor.class); |
|
78 |
|
|
79 |
return gta; |
|
80 |
} |
|
81 |
|
|
82 |
private String readFromClasspath(final String filename) { |
|
83 |
final StringWriter sw = new StringWriter(); |
|
84 |
try { |
|
85 |
IOUtils.copy(getClass().getResourceAsStream(filename), sw); |
|
86 |
return sw.toString(); |
|
87 |
} catch (final IOException e) { |
|
88 |
throw new RuntimeException("cannot load resource from classpath: " + filename); |
|
89 |
} |
|
90 |
} |
|
91 |
|
|
92 |
protected MapDocument result(final Config config, final String id, final String title) { |
|
93 |
return result(config, id, title, null, new ArrayList<String>(), null); |
|
94 |
} |
|
95 |
|
|
96 |
protected MapDocument result(final Config config, final String id, final String title, final String date) { |
|
97 |
return result(config, id, title, date, new ArrayList<String>(), null); |
|
98 |
} |
|
99 |
|
|
100 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) { |
|
101 |
return result(config, id, title, date, pid, null); |
|
102 |
} |
|
103 |
|
|
104 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) { |
|
105 |
return result(config, id, title, date, pid, null); |
|
106 |
} |
|
107 |
|
|
108 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) { |
|
109 |
return result(config, id, title, date, Lists.newArrayList(pid), authors); |
|
110 |
} |
|
111 |
|
|
112 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) { |
|
113 |
final Publication.Metadata.Builder metadata = Publication.Metadata.newBuilder(); |
|
114 |
if (!StringUtils.isBlank(title)) { |
|
115 |
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles"))); |
|
116 |
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles"))); |
|
117 |
} |
|
118 |
if (!StringUtils.isBlank(date)) { |
|
119 |
metadata.setDateofacceptance(sf(date)); |
|
120 |
} |
|
121 |
|
|
122 |
final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication); |
|
123 |
final Publication.Builder result = Publication.newBuilder().setMetadata(metadata); |
|
124 |
|
|
125 |
if (authors != null) { |
|
126 |
for (final String author : authors) { |
|
127 |
result.addAuthor(person(author)); |
|
128 |
} |
|
129 |
} |
|
130 |
|
|
131 |
entity.setPublication(result); |
|
132 |
|
|
133 |
if (pid != null) { |
|
134 |
for(String p : pid) { |
|
135 |
if (!StringUtils.isBlank(p)) { |
|
136 |
entity.addPid(sp(p, "doi")); |
|
137 |
//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai")); |
|
138 |
} |
|
139 |
} |
|
140 |
} |
|
141 |
|
|
142 |
final DNGFEntity build = entity.build(); |
|
143 |
return ProtoDocumentBuilder.newInstance(id, build, config.model()); |
|
144 |
} |
|
145 |
|
|
146 |
private Person.Builder person(final String author) { |
|
147 |
final Person.Builder person = Person.newBuilder(); |
|
148 |
|
|
149 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false); |
|
150 |
final Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
151 |
if (p.isAccurate()) { |
|
152 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
153 |
metadata.addSecondnames(sf(p.getNormalisedSurname())); |
|
154 |
metadata.setFullname(sf(p.getNormalisedFullname())); |
|
155 |
} else { |
|
156 |
metadata.setFullname(sf(p.getOriginal())); |
|
157 |
} |
|
158 |
|
|
159 |
return person.setMetadata(metadata); |
|
160 |
} |
|
161 |
|
|
162 |
private DNGFEntity.Builder dngfEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) { |
|
163 |
final DNGFEntity.Builder entity = DNGFEntity.newBuilder().setId(id).setType(type); |
|
164 |
return entity; |
|
165 |
} |
|
166 |
|
|
167 |
protected MapDocument organization(final Config config, final String id, final String legalName) { |
|
168 |
return organization(config, id, legalName, null); |
|
169 |
} |
|
170 |
|
|
171 |
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) { |
|
172 |
final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder(); |
|
173 |
if (legalName != null) { |
|
174 |
metadata.setLegalname(sf(legalName)); |
|
175 |
} |
|
176 |
if (legalShortName != null) { |
|
177 |
metadata.setLegalshortname(sf(legalShortName)); |
|
178 |
} |
|
179 |
|
|
180 |
final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication); |
|
181 |
entity.setOrganization(Organization.newBuilder().setMetadata(metadata)); |
|
182 |
|
|
183 |
return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model()); |
|
184 |
} |
|
185 |
|
|
186 |
private StructuredProperty sp(final String pid, final String type) { |
|
187 |
final Builder pidSp = |
|
188 |
StructuredProperty.newBuilder().setValue(pid) |
|
189 |
.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types")); |
|
190 |
return pidSp.build(); |
|
191 |
} |
|
192 |
|
|
193 |
protected Field title(final String s) { |
|
194 |
return new FieldValueImpl(Type.String, "title", s); |
|
195 |
} |
|
196 |
|
|
197 |
protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) { |
|
198 |
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier); |
|
199 |
} |
|
200 |
|
|
201 |
/* |
|
202 |
* protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); } |
|
203 |
* |
|
204 |
* protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return |
|
205 |
* Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); } |
|
206 |
*/ |
|
207 |
|
|
208 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/model/ProtoDocumentBuilderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import com.google.common.collect.Iterables; |
|
4 |
import com.google.common.collect.Sets; |
|
5 |
import com.google.common.collect.Sets.SetView; |
|
6 |
import com.googlecode.protobuf.format.JsonFormat; |
|
7 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
8 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
9 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
10 |
import eu.dnetlib.pace.config.Config; |
|
11 |
import eu.dnetlib.pace.model.adaptor.Pid; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
import static org.junit.Assert.assertFalse; |
|
15 |
import static org.junit.Assert.assertTrue; |
|
16 |
|
|
17 |
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest { |
|
18 |
|
|
19 |
@Test |
|
20 |
public void test_serialise1() { |
|
21 |
|
|
22 |
final String id = "12345"; |
|
23 |
|
|
24 |
final Config config = getResultFullConf(); |
|
25 |
|
|
26 |
final MapDocument document = ProtoDocumentBuilder.newInstance(id, getResult(id), config.model()); |
|
27 |
|
|
28 |
assertFalse(document.fieldNames().isEmpty()); |
|
29 |
assertFalse(Iterables.isEmpty(document.fields())); |
|
30 |
|
|
31 |
System.out.println("original:\n" + document); |
|
32 |
|
|
33 |
final String stringDoc = MapDocumentSerializer.toString(document); |
|
34 |
|
|
35 |
System.out.println("srialization:\n" + stringDoc); |
|
36 |
|
|
37 |
final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes()); |
|
38 |
|
|
39 |
final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames()); |
|
40 |
|
|
41 |
assertTrue(diff.isEmpty()); |
|
42 |
|
|
43 |
System.out.println("decoded:\n" + decoded); |
|
44 |
} |
|
45 |
|
|
46 |
@Test |
|
47 |
public void test_serialise2() { |
|
48 |
|
|
49 |
final String id = "12345"; |
|
50 |
final String path = "/eu/dnetlib/pace/model/gt.author.manghi1.json"; |
|
51 |
|
|
52 |
final Config config = getPersonConf(); |
|
53 |
|
|
54 |
final MapDocument document = ProtoDocumentBuilder.newInstance(id, getPersonGT(path).getEntity(), config.model()); |
|
55 |
|
|
56 |
assertFalse(document.fieldNames().isEmpty()); |
|
57 |
assertFalse(Iterables.isEmpty(document.fields())); |
|
58 |
|
|
59 |
System.out.println("original:\n" + document); |
|
60 |
|
|
61 |
final String stringDoc = MapDocumentSerializer.toString(document); |
|
62 |
|
|
63 |
System.out.println("srialization:\n" + stringDoc); |
|
64 |
|
|
65 |
final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes()); |
|
66 |
|
|
67 |
final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames()); |
|
68 |
|
|
69 |
assertTrue(diff.isEmpty()); |
|
70 |
|
|
71 |
System.out.println("decoded:\n" + decoded); |
|
72 |
} |
|
73 |
|
|
74 |
|
|
75 |
@Test |
|
76 |
public void testPidSerialization() { |
|
77 |
|
|
78 |
final StructuredProperty sp = StructuredProperty.newBuilder().setValue("1234").setQualifier( |
|
79 |
Qualifier.newBuilder().setClassid("doi").setClassname("doi").setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types")).build(); |
|
80 |
|
|
81 |
final String json = JsonFormat.printToString(sp); |
|
82 |
|
|
83 |
final Pid pid = Pid.fromOafJson(json); |
|
84 |
|
|
85 |
|
|
86 |
|
|
87 |
|
|
88 |
} |
|
89 |
|
|
90 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/model/gt/AuthorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model.gt; |
|
2 |
|
|
3 |
import com.google.common.collect.Sets; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import java.util.Set; |
|
7 |
|
|
8 |
import static org.junit.Assert.assertTrue; |
|
9 |
|
|
10 |
public class AuthorTest { |
|
11 |
|
|
12 |
@Test |
|
13 |
public void test() { |
|
14 |
final Set<Author> s1 = getAuthors(3); |
|
15 |
final Set<Author> s2 = getAuthors(3); |
|
16 |
|
|
17 |
final Set<Author> i = Sets.intersection(s1, s2); |
|
18 |
|
|
19 |
System.out.println(i); |
|
20 |
|
|
21 |
assertTrue(i.size() == 3); |
|
22 |
|
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void test1() { |
|
27 |
final Authors a1 = new Authors(a("1", "Wang, M.")); |
|
28 |
final Authors a2 = new Authors(a("1", "Wang, M.")); |
|
29 |
|
|
30 |
final Set<Author> i = Sets.intersection(a1, a2); |
|
31 |
|
|
32 |
assertTrue(i.size() == 1); |
|
33 |
|
|
34 |
} |
|
35 |
|
|
36 |
private Set<Author> getAuthors(final int n) { |
|
37 |
final Set<Author> s = Sets.newHashSet(); |
|
38 |
|
|
39 |
for (int i = 0; i < n; i++) { |
|
40 |
s.add(a(i + "", "name" + i)); |
|
41 |
} |
|
42 |
return s; |
|
43 |
} |
|
44 |
|
|
45 |
private Author a(final String id, final String fullname) { |
|
46 |
final Author a = new Author(); |
|
47 |
a.setId(id); |
|
48 |
a.setFullname(fullname); |
|
49 |
return a; |
|
50 |
} |
|
51 |
|
|
52 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv; |
|
6 |
import org.junit.After; |
|
7 |
import org.junit.Before; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertEquals; |
|
11 |
|
|
12 |
/** |
|
13 |
* OpenTrialsXsltFunctions Tester. |
|
14 |
* |
|
15 |
*/ |
|
16 |
public class OpenTrialsXsltFunctionsTest { |
|
17 |
|
|
18 |
private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; |
|
19 |
private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; |
|
20 |
private String jidentifiers = "{112683,NCT00920439}"; |
|
21 |
|
|
22 |
@Before |
|
23 |
public void before() throws Exception { |
|
24 |
} |
|
25 |
|
|
26 |
@After |
|
27 |
public void after() throws Exception { |
|
28 |
} |
|
29 |
|
|
30 |
/** |
|
31 |
* Method: getProvs(String jsonProvList) |
|
32 |
*/ |
|
33 |
@Test |
|
34 |
public void testGetProvs() throws Exception { |
|
35 |
List<JsonProv> list = OpenTrialsXsltFunctions.getProvs(jsonProv); |
|
36 |
assertEquals(2, list.size()); |
|
37 |
} |
|
38 |
|
|
39 |
/** |
|
40 |
* Method: getMainIdentifierURL(String jsonProvList) |
|
41 |
*/ |
|
42 |
@Test |
|
43 |
public void testGetMainIdentifierURL() throws Exception { |
|
44 |
String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv); |
|
45 |
assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url ); |
|
46 |
url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull); |
|
47 |
assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url); |
|
48 |
} |
|
49 |
|
|
50 |
|
|
51 |
|
|
52 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/OntologyLoaderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStream; |
|
5 |
|
|
6 |
import org.apache.commons.lang3.StringUtils; |
|
7 |
import org.junit.Assert; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertEquals; |
|
11 |
import static org.junit.Assert.assertNotNull; |
|
12 |
import static org.junit.Assert.assertTrue; |
|
13 |
|
|
14 |
/** |
|
15 |
* Created by claudio on 12/12/2016. |
|
16 |
*/ |
|
17 |
public class OntologyLoaderTest { |
|
18 |
|
|
19 |
private String basePath = "/eu/dnetlib/bootstrap/profiles/OntologyDSResources/OntologyDSResourceType/"; |
|
20 |
|
|
21 |
@Test |
|
22 |
public void testLoadOntologyFromCp() { |
|
23 |
|
|
24 |
final InputStream i = getClass().getResourceAsStream(basePath + "publication_publication_relations.xml"); |
|
25 |
|
|
26 |
Ontology o = OntologyLoader.loadOntologyFromCp(i); |
|
27 |
checkOntology(o); |
|
28 |
|
|
29 |
String providedBy = o.inverseOf("isSupplementedBy"); |
|
30 |
assertEquals(providedBy, "isSupplementTo"); |
|
31 |
|
|
32 |
String provides = o.inverseOf("isPartOf"); |
|
33 |
assertEquals(provides, "hasPart"); |
|
34 |
} |
|
35 |
|
|
36 |
@Test |
|
37 |
public void testLoadOntologiesFromCp() throws IOException { |
|
38 |
|
|
39 |
OntologyLoader.loadOntologiesFromCp().values().forEach(o -> checkOntology(o)); |
|
40 |
} |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testLoadOntologiesSerialization() throws IOException { |
|
44 |
|
|
45 |
final Ontologies o = OntologyLoader.loadOntologiesFromCp(); |
|
46 |
assertNotNull(o); |
|
47 |
final String json = o.toJson(true); |
|
48 |
|
|
49 |
System.out.println(json); |
|
50 |
|
|
51 |
assertTrue(StringUtils.isNoneBlank(json)); |
|
52 |
|
|
53 |
final Ontologies o1 = OntologyLoader.loadOntologies(json); |
|
54 |
|
|
55 |
assertNotNull(o1); |
|
56 |
|
|
57 |
o1.entrySet().forEach(e -> checkOntology(e.getValue())); |
|
58 |
} |
|
59 |
|
|
60 |
private void checkOntology(Ontology o) { |
|
61 |
Assert.assertNotNull(o); |
|
62 |
Assert.assertTrue(StringUtils.isNotBlank(o.getCode())); |
|
63 |
Assert.assertTrue(StringUtils.isNotBlank(o.getDescription())); |
|
64 |
Assert.assertNotNull(o.getTerms().values()); |
|
65 |
|
|
66 |
o.getTerms().values().forEach(it -> { |
|
67 |
Assert.assertTrue(StringUtils.isNotBlank(it.getCode())); |
|
68 |
Assert.assertTrue(StringUtils.isNotBlank(it.getEncoding())); |
|
69 |
Assert.assertTrue(StringUtils.isNotBlank(it.getEnglishName())); |
|
70 |
Assert.assertTrue(StringUtils.isNotBlank(it.getNativeName())); |
|
71 |
Assert.assertTrue(StringUtils.isNotBlank(it.getInverseCode())); |
|
72 |
Assert.assertNotNull(o.getTerms().get(it.getInverseCode())); |
|
73 |
}); |
|
74 |
} |
|
75 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/SolrProtoMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.text.ParseException; |
|
6 |
import java.text.SimpleDateFormat; |
|
7 |
import java.util.Arrays; |
|
8 |
import java.util.List; |
|
9 |
|
|
10 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
11 |
import com.googlecode.protobuf.format.JsonFormat; |
|
12 |
import eu.dnetlib.data.mapreduce.util.DNGFTest; |
|
13 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
14 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
15 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
16 |
import org.apache.commons.codec.binary.Base64; |
|
17 |
import org.apache.commons.io.IOUtils; |
|
18 |
import org.apache.commons.logging.Log; |
|
19 |
import org.apache.commons.logging.LogFactory; |
|
20 |
import org.apache.solr.common.SolrInputDocument; |
|
21 |
import org.apache.solr.common.SolrInputField; |
|
22 |
import org.dom4j.DocumentException; |
|
23 |
import org.junit.Before; |
|
24 |
import org.junit.Test; |
|
25 |
|
|
26 |
import static org.junit.Assert.assertFalse; |
|
27 |
import static org.junit.Assert.assertNotNull; |
|
28 |
|
|
29 |
public class SolrProtoMapperTest { |
|
30 |
|
|
31 |
private static final Log log = LogFactory.getLog(SolrProtoMapper.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
32 |
|
|
33 |
private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); |
|
34 |
|
|
35 |
private final static List<String> dateFormats = Arrays.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy"); |
|
36 |
|
|
37 |
private final static String dataset = "{\"kind\": \"entity\",\"entity\": {\"type\": \"dataset\",\"originalId\": [\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"C1293870510-PODAAC\",\"C1293870494-PODAAC\",\"C1293870514-PODAAC\",\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|dedup_wf_001::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"children\": [{\"type\": \"dataset\",\"originalId\": [\"C1293870514-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ed18768335fcac40ae2cc062abc9e442\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T01:56:49.933Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870494-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ac6f36f8c9ef39f815045eaa1182e745\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T02:51:51.788Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::a26b222d236fd523df9711c21a879911\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:01:46.198Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870510-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}}],\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},\"dataInfo\": {\"inferred\": true,\"deletedbyinference\": false,\"trust\": \"0.9\",\"inferenceprovenance\": \"dedup-similarity-dataset\",\"provenanceaction\": {\"classid\": \"sysimport:dedup\",\"classname\": \"sysimport:dedup\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}}}"; |
|
38 |
|
|
39 |
private String fields; |
|
40 |
|
|
41 |
@Before |
|
42 |
public void setUp() throws IOException { |
|
43 |
final StringWriter sw = new StringWriter(); |
|
44 |
IOUtils.copy(getClass().getResourceAsStream("fields.xml"), sw); |
|
45 |
fields = sw.toString(); |
|
46 |
assertNotNull(fields); |
|
47 |
assertFalse(fields.isEmpty()); |
|
48 |
|
|
49 |
log.info(fields); |
|
50 |
} |
|
51 |
|
|
52 |
@Test |
|
53 |
public void testProto2SolrDocument() throws DocumentException, InvalidProtocolBufferException, JsonFormat.ParseException { |
|
54 |
final SolrProtoMapper mapper = new SolrProtoMapper(fields); |
|
55 |
|
|
56 |
assertNotNull(mapper); |
|
57 |
|
|
58 |
DNGF.Builder builder = DNGF.newBuilder(); |
|
59 |
JsonFormat.merge(dataset, builder); |
|
60 |
|
|
61 |
final DNGF oaf = builder.build(); |
|
62 |
|
|
63 |
assertNotNull(oaf.getEntity().getChildrenList()); |
|
64 |
assertFalse(oaf.getEntity().getChildrenList().isEmpty()); |
|
65 |
|
|
66 |
log.info("byte[] size: " + oaf.toByteArray().length); |
|
67 |
|
|
68 |
log.info("json size: " + JsonFormat.printToString(oaf).length()); |
|
69 |
|
|
70 |
log.info("base64 size: " + Base64.encodeBase64String(oaf.toByteArray()).length()); |
|
71 |
|
|
72 |
final byte[] decodeBase64 = Base64.decodeBase64(Base64.encodeBase64String(oaf.toByteArray())); |
|
73 |
|
|
74 |
log.info("decoded: " + JsonFormat.printToString(DNGF.parseFrom(decodeBase64))); |
|
75 |
|
|
76 |
final SolrInputDocument doc = mapper.map(oaf, getParsedDateField("2015-02-15"), "asd", "action-set"); |
|
77 |
|
|
78 |
assertNotNull(doc); |
|
79 |
|
|
80 |
for (final SolrInputField f : doc.values()) { |
|
81 |
log.info(f); |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
|
|
86 |
/** |
|
87 |
* method return a solr-compatible string representation of a date |
|
88 |
* |
|
89 |
* @param date |
|
90 |
* @return |
|
91 |
* @throws DocumentException |
|
92 |
* @throws ParseException |
|
93 |
*/ |
|
94 |
public String getParsedDateField(final String date) { |
|
95 |
for (String formatString : dateFormats) { |
|
96 |
try { |
|
97 |
return new SimpleDateFormat(outFormat).format(new SimpleDateFormat(formatString).parse(date)); |
|
98 |
} catch (ParseException e) {} |
|
99 |
} |
|
100 |
throw new IllegalStateException("unable to parse date: " + date); |
|
101 |
} |
|
102 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/graph/utils/RelDescriptorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.graph.utils; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
import static org.junit.Assert.assertEquals; |
|
5 |
|
|
6 |
/** |
|
7 |
* Created by claudio on 02/02/2017. |
|
8 |
*/ |
|
9 |
public class RelDescriptorTest { |
|
10 |
|
|
11 |
@Test |
|
12 |
public void testRelDesriptor() { |
|
13 |
|
|
14 |
final RelDescriptor rd = new RelDescriptor("publication_publication_isMergedIn"); |
|
15 |
|
|
16 |
assertEquals("publication_publication", rd.getOntologyCode()); |
|
17 |
assertEquals("publication_publication", rd.asQualifier().getSchemeid()); |
|
18 |
assertEquals("isMergedIn", rd.getTermCode()); |
|
19 |
assertEquals("isMergedIn", rd.asQualifier().getClassid()); |
|
20 |
|
|
21 |
//assertEquals(rd.getTargetId(), "50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab"); |
|
22 |
} |
|
23 |
|
|
24 |
@Test |
|
25 |
public void testRelDesriptorFull() { |
|
26 |
|
|
27 |
final RelDescriptor rd = new RelDescriptor("publication_publication_isMergedIn:50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab"); |
|
28 |
|
|
29 |
assertEquals("publication_publication", rd.getOntologyCode()); |
|
30 |
assertEquals("publication_publication", rd.asQualifier().getSchemeid()); |
|
31 |
assertEquals("isMergedIn", rd.getTermCode()); |
|
32 |
assertEquals("isMergedIn", rd.asQualifier().getClassid()); |
|
33 |
assertEquals("50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab", rd.getTargetId()); |
|
34 |
} |
|
35 |
|
|
36 |
@Test(expected = IllegalArgumentException.class) |
|
37 |
public void testRelDesriptorMalformed() { |
|
38 |
|
|
39 |
new RelDescriptor("asd"); |
|
40 |
} |
|
41 |
|
|
42 |
@Test(expected = IllegalArgumentException.class) |
|
43 |
public void testRelDesriptorEmpty() { |
|
44 |
|
|
45 |
new RelDescriptor(""); |
|
46 |
} |
|
47 |
|
|
48 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import com.google.protobuf.GeneratedMessage; |
|
4 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
5 |
import eu.dnetlib.data.graph.model.DNGFDecoder; |
|
6 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
7 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
8 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel; |
|
9 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.*; |
|
11 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
12 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
13 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
14 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
15 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
|
16 |
import eu.dnetlib.data.proto.PublicationProtos.Publication; |
|
17 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
18 |
import org.apache.commons.logging.Log; |
|
19 |
import org.apache.commons.logging.LogFactory; |
|
20 |
|
|
21 |
public class DNGFTest { |
|
22 |
|
|
23 |
public static final String CITATION_JSON = |
|
24 |
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>"; |
|
25 |
public static final String STATISTICS_JSON = |
|
26 |
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]"; |
|
27 |
private static final Log log = LogFactory.getLog(DNGFTest.class); |
|
28 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType"; |
|
29 |
|
|
30 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) { |
|
31 |
return getStructuredproperty(value, classname, schemename, null); |
|
32 |
} |
|
33 |
|
|
34 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) { |
|
35 |
final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename)); |
|
36 |
if (dataInfo != null) { |
|
37 |
sp.setDataInfo(dataInfo); |
|
38 |
} |
|
39 |
return sp; |
|
40 |
} |
|
41 |
|
|
42 |
public static Qualifier.Builder getQualifier(final String classname, final String schemename) { |
|
43 |
return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); |
|
44 |
} |
|
45 |
|
|
46 |
public static KeyValue getKV(final String id, final String name) { |
|
47 |
return KeyValue.newBuilder().setKey(id).setValue(name).build(); |
|
48 |
} |
|
49 |
|
|
50 |
public static DNGFEntity getDatasource(final String datasourceId) { |
|
51 |
return DNGFEntity |
|
52 |
.newBuilder() |
|
53 |
.setType(Type.datasource) |
|
54 |
.setId(datasourceId) |
|
55 |
.setDatasource( |
|
56 |
Datasource.newBuilder().setMetadata( |
|
57 |
Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname")) |
|
58 |
.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage")) |
|
59 |
.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems")) |
|
60 |
.setOdnumberofitemsdate(sf("numberofitems date")) |
|
61 |
// .addOdsubjects("subjects") |
|
62 |
.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes")) |
|
63 |
.setDatasourcetype(getQualifier("type class", "type scheme")))).build(); |
|
64 |
} |
|
65 |
|
|
66 |
public static DNGFEntity getResult(final String id) { |
|
67 |
return getResultBuilder(id).build(); |
|
68 |
} |
|
69 |
|
|
70 |
public static DNGFEntity.Builder getResultBuilder(final String id) { |
|
71 |
return DNGFEntity |
|
72 |
.newBuilder() |
|
73 |
.setType(Type.publication) |
|
74 |
.setId(id) |
|
75 |
.setPublication( |
|
76 |
Publication.newBuilder() |
|
77 |
.setMetadata( |
|
78 |
Publication.Metadata |
|
79 |
.newBuilder() |
|
80 |
.addTitle( |
|
81 |
getStructuredproperty( |
|
82 |
"Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population", |
|
83 |
"main title", "dnet:result_titles", getDataInfo())) |
|
84 |
.addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo())) |
|
85 |
.addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects")) |
|
86 |
.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB")) |
|
87 |
.addContext(Context.newBuilder().setId("egi::virtual::970")) |
|
88 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied")) |
|
89 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math")) |
|
90 |
.addContext(Context.newBuilder().setId("egi::classification::natsc")) |
|
91 |
.addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi")) |
|
92 |
.addDescription(sf("Responsible for making and maintaining the extracellular matrix ...")) |
|
93 |
.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD")) |
|
94 |
.setResulttype(getQualifier("publication", "dnet:result_types")) |
|
95 |
.setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed")) |
|
96 |
.addInstance(getInstance("10|od__10", "arxiv"))) |
|
97 |
.addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive")) |
|
98 |
.addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids")) |
|
99 |
.setDateofcollection(""); |
|
100 |
} |
|
101 |
|
|
102 |
public static DataInfo getDataInfo() { |
|
103 |
return getDataInfo("0.4"); |
|
104 |
} |
|
105 |
|
|
106 |
public static DataInfo getDataInfo(final String trust) { |
|
107 |
return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy")) |
|
108 |
.build(); |
|
109 |
} |
|
110 |
|
|
111 |
public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) { |
|
112 |
return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setLicence(getQualifier("OpenAccess", "dnet:access_modes")) |
|
113 |
.setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url"); |
|
114 |
|
|
115 |
} |
|
116 |
|
|
117 |
public static DNGFRel getDedupRel(final String source, final String target) { |
|
118 |
return DNGFRel.newBuilder().setSource(source).setTarget(target).setRelType(getQualifier("dedup", "dedupScheme")) |
|
119 |
.setChild(false).setCachedTarget(getResult(target)).build(); |
|
120 |
} |
|
121 |
|
|
122 |
|
|
123 |
public static DNGFEntity getPerson() { |
|
124 |
return DNGFEntity |
|
125 |
.newBuilder() |
|
126 |
.setType(Type.person) |
|
127 |
.setId("WOS:000277866500014_A._Shirazi-Adl") |
|
128 |
.setPerson( |
|
129 |
Person.newBuilder().setMetadata( |
|
130 |
Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl")) |
|
131 |
.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(getQualifier("EN", "dnet:countries")))) |
|
132 |
.build(); |
|
133 |
} |
|
134 |
|
|
135 |
|
|
136 |
|
|
137 |
public static DNGFEntity getOrganization(final String orgId) { |
|
138 |
return DNGFEntity |
|
139 |
.newBuilder() |
|
140 |
.setType(Type.organization) |
|
141 |
.setId(orgId) |
|
142 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
143 |
.setOrganization( |
|
144 |
Organization.newBuilder().setMetadata( |
|
145 |
Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE")) |
|
146 |
.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org")) |
|
147 |
.setCountry(getQualifier("ML", "dnet:countries")))).build(); |
|
148 |
} |
|
149 |
|
|
150 |
public static DNGFEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException { |
|
151 |
return DNGFEntity |
|
152 |
.newBuilder() |
|
153 |
.setType(Type.project) |
|
154 |
.setId(projectId) |
|
155 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
156 |
.setProject( |
|
157 |
Project.newBuilder() |
|
158 |
.setMetadata( |
|
159 |
Project.Metadata |
|
160 |
.newBuilder() |
|
161 |
.setAcronym(sf("5CYRQOL")) |
|
162 |
.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life")) |
|
163 |
.setStartdate(sf("2007-05-01")) |
|
164 |
.setEnddate(sf("2007-10-31")) |
|
165 |
.setEcsc39(sf("false")) |
|
166 |
.setContracttype(getQualifier("CSA", "ec:FP7contractTypes")) |
|
167 |
.addFundingtree( |
|
168 |
sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::" |
|
169 |
+ fundingProgram |
|
170 |
+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::" |
|
171 |
+ fundingProgram |
|
172 |
+ "</id><description>" |
|
173 |
+ fundingProgram |
|
174 |
+ "-People</description><name>" |
|
175 |
+ fundingProgram |
|
176 |
+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>")))) |
|
177 |
.build(); |
|
178 |
} |
|
179 |
|
|
180 |
public static DNGFEntity getProjectWT() throws InvalidProtocolBufferException { |
|
181 |
return DNGFEntity |
|
182 |
.newBuilder() |
|
183 |
.setType(Type.project) |
|
184 |
.setId("project|wt::087536") |
|
185 |
.addCollectedfrom(getKV("wellcomeTrust", "wellcome trust")) |
|
186 |
.setProject( |
|
187 |
Project.newBuilder() |
|
188 |
.setMetadata( |
|
189 |
Project.Metadata |
|
190 |
.newBuilder() |
|
191 |
.setAcronym(sf("UNKNOWN")) |
|
192 |
.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP).")) |
|
193 |
.setStartdate(sf("2007-05-01")) |
|
194 |
.setEnddate(sf("2007-10-31")) |
|
195 |
.setEcsc39(sf("false")) |
|
196 |
.setContracttype(getQualifier("UNKNOWN", "wt:contractTypes")) |
|
197 |
.addFundingtree( |
|
198 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")) |
|
199 |
.addFundingtree( |
|
200 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")))) |
|
201 |
.build(); |
|
202 |
} |
|
203 |
|
|
204 |
public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) { |
|
205 |
final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value); |
|
206 |
return e.build(); |
|
207 |
} |
|
208 |
|
|
209 |
public static StringField sf(final String s) { |
|
210 |
return sf(s, null); |
|
211 |
} |
|
212 |
|
|
213 |
public static StringField sf(final String s, final DataInfo dataInfo) { |
|
214 |
final StringField.Builder sf = StringField.newBuilder().setValue(s); |
|
215 |
if (dataInfo != null) { |
|
216 |
sf.setDataInfo(dataInfo); |
|
217 |
} |
|
218 |
return sf.build(); |
|
219 |
} |
|
220 |
|
|
221 |
public static DNGFDecoder embed(final GeneratedMessage msg, |
|
222 |
final Kind kind, |
|
223 |
final boolean deletedByInference, |
|
224 |
final boolean inferred, |
|
225 |
final String provenance, |
|
226 |
final String action) { |
|
227 |
|
|
228 |
final DNGF.Builder dngf = DNGF.newBuilder() |
|
229 |
.setKind(kind) |
|
230 |
.setLastupdatetimestamp(System.currentTimeMillis()) |
|
231 |
.setDataInfo( |
|
232 |
DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5") |
|
233 |
.setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action))); |
|
234 |
switch (kind) { |
|
235 |
case entity: |
|
236 |
dngf.setEntity((DNGFEntity) msg); |
|
237 |
break; |
|
238 |
case relation: |
|
239 |
dngf.setRel((DNGFRel) msg); |
|
240 |
break; |
|
241 |
default: |
|
242 |
break; |
|
243 |
} |
|
244 |
|
|
245 |
return DNGFDecoder.decode(dngf.build()); |
|
246 |
} |
|
247 |
|
|
248 |
public static DNGFDecoder embed(final GeneratedMessage msg, final Kind kind) { |
|
249 |
return embed(msg, kind, false, false, "inference_provenance", "provenance_action"); |
|
250 |
} |
|
251 |
|
|
252 |
|
|
253 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import eu.dnetlib.data.graph.model.DNGFDecoder; |
|
6 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import static org.junit.Assert.assertFalse; |
|
10 |
import static org.junit.Assert.assertNotNull; |
|
11 |
|
|
12 |
public class DNGFDecoderTest { |
|
13 |
|
|
14 |
@Test |
|
15 |
public void testAsXml() { |
|
16 |
|
|
17 |
final DNGFDecoder decoder = DNGFTest.embed(DNGFTest.getResult("50|id_1"), Kind.entity); |
|
18 |
|
|
19 |
assertNotNull(decoder); |
|
20 |
|
|
21 |
assertNotNull(decoder.asXml()); |
|
22 |
|
|
23 |
System.out.println(decoder.asXml()); |
|
24 |
|
|
25 |
} |
|
26 |
|
|
27 |
@Test |
|
28 |
public void testGetFieldValues() { |
|
29 |
final DNGFDecoder decoder = DNGFTest.embed(DNGFTest.getResult("50|id_1"), Kind.entity); |
|
30 |
|
|
31 |
final String path = "publication/metadata/title/value"; |
|
32 |
final List<String> titles = decoder.decodeEntity().getFieldValues(path); |
|
33 |
|
|
34 |
assertNotNull(titles); |
|
35 |
assertFalse(titles.isEmpty()); |
|
36 |
} |
|
37 |
} |
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFRowKeyDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder; |
Also available in: Unified diff
renamed