1
|
package eu.dnetlib.pace.distance;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.google.common.collect.Lists;
|
6
|
import eu.dnetlib.pace.AbstractProtoPaceTest;
|
7
|
import eu.dnetlib.pace.config.Config;
|
8
|
import eu.dnetlib.pace.distance.eval.ScoreResult;
|
9
|
import eu.dnetlib.pace.model.MapDocument;
|
10
|
import org.junit.Test;
|
11
|
|
12
|
import static org.junit.Assert.assertTrue;
|
13
|
|
14
|
public class DetectorTest extends AbstractProtoPaceTest {
|
15
|
|
16
|
@Test
|
17
|
public void testScoreResult() {
|
18
|
final Config config = getResultProdConf();
|
19
|
|
20
|
final MapDocument resA = result(config, "A", "Recent results from CDFsd");
|
21
|
final MapDocument resB = result(config, "B", "Recent results from CDF");
|
22
|
|
23
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
24
|
|
25
|
System.out.println(sr.toString());
|
26
|
}
|
27
|
|
28
|
@Test
|
29
|
public void testDistanceResultSimple() {
|
30
|
final Config config = getResultSimpleConf();
|
31
|
|
32
|
final MapDocument resA = result(config, "A", "Recent results from CDF");
|
33
|
final MapDocument resB = result(config, "B", "Recent results from CDF");
|
34
|
|
35
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
36
|
final double d = sr.getScore();
|
37
|
System.out.println(String.format(" d ---> %s", d));
|
38
|
|
39
|
assertTrue(d == 1.0);
|
40
|
}
|
41
|
|
42
|
@Test
|
43
|
public void testDistanceResultSimpleMissingDates() {
|
44
|
final Config config = getResultSimpleConf();
|
45
|
|
46
|
final MapDocument resA = result(config, "A", "Recent results from BES");
|
47
|
final MapDocument resB = result(config, "A", "Recent results from CES");
|
48
|
|
49
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
50
|
final double d = sr.getScore();
|
51
|
System.out.println(String.format(" d ---> %s", d));
|
52
|
|
53
|
assertTrue(d > 0.97);
|
54
|
}
|
55
|
|
56
|
@Test
|
57
|
public void testDistanceResultInvalidDate() {
|
58
|
final Config config = getResultConf();
|
59
|
|
60
|
final MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05");
|
61
|
final MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty");
|
62
|
|
63
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
64
|
final double d = sr.getScore();
|
65
|
System.out.println(String.format(" d ---> %s", d));
|
66
|
|
67
|
assertTrue(d == 1.0);
|
68
|
}
|
69
|
|
70
|
@Test
|
71
|
public void testDistanceResultMissingOneDate() {
|
72
|
final Config config = getResultConf();
|
73
|
|
74
|
final MapDocument resA = result(config, "A", "title title title 6BESR", null);
|
75
|
final MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02");
|
76
|
|
77
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
78
|
double d = sr.getScore();
|
79
|
System.out.println(String.format(" d ---> %s", d));
|
80
|
|
81
|
assertTrue((d > 0.9) && (d < 1.0));
|
82
|
}
|
83
|
|
84
|
@Test
|
85
|
public void testDistanceResult() {
|
86
|
final Config config = getResultConf();
|
87
|
|
88
|
final MapDocument resA = result(config, "A", "title title title BES", "");
|
89
|
final MapDocument resB = result(config, "B", "title title title CLEO");
|
90
|
|
91
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
92
|
double d = sr.getScore();
|
93
|
System.out.println(String.format(" d ---> %s", d));
|
94
|
|
95
|
assertTrue((d > 0.9) && (d < 1.0));
|
96
|
}
|
97
|
|
98
|
@Test
|
99
|
public void testDistanceResultMissingTwoDate() {
|
100
|
final Config config = getResultConf();
|
101
|
|
102
|
final MapDocument resA = result(config, "A", "title title title 6BESR");
|
103
|
final MapDocument resB = result(config, "B", "title title title 6CLER");
|
104
|
|
105
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
106
|
double d = sr.getScore();
|
107
|
System.out.println(String.format(" d ---> %s", d));
|
108
|
|
109
|
assertTrue((d > 0.9) && (d < 1.0));
|
110
|
}
|
111
|
|
112
|
@Test
|
113
|
public void testDistanceOrganizationIgnoreMissing() {
|
114
|
|
115
|
final Config config = getOrganizationSimpleConf();
|
116
|
|
117
|
final MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE");
|
118
|
final MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR");
|
119
|
|
120
|
final ScoreResult sr = new PaceDocumentDistance().between(orgA, orgB, config);
|
121
|
final double d = sr.getScore();
|
122
|
System.out.println(String.format(" d ---> %s", d));
|
123
|
|
124
|
assertTrue(d == 1.0);
|
125
|
}
|
126
|
|
127
|
@Test
|
128
|
public void testDistanceResultCase1() {
|
129
|
|
130
|
final Config config = getResultConf();
|
131
|
|
132
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003");
|
133
|
final MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003");
|
134
|
|
135
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
136
|
double d = sr.getScore();
|
137
|
System.out.println(String.format(" d ---> %s", d));
|
138
|
|
139
|
assertTrue((d > 0.9) && (d < 1.0));
|
140
|
}
|
141
|
|
142
|
@Test
|
143
|
public void testDistanceResultCaseDoiMatch1() {
|
144
|
final Config config = getResultConf();
|
145
|
|
146
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "10.1594/PANGAEA.726855");
|
147
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855");
|
148
|
|
149
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
150
|
double d = sr.getScore();
|
151
|
System.out.println(String.format(" d ---> %s", d));
|
152
|
|
153
|
assertTrue("exact DOIs will produce an exact match", d == 1.0);
|
154
|
}
|
155
|
|
156
|
@Test
|
157
|
public void testDistanceResultCaseDoiMatch2() {
|
158
|
final Config config = getResultConf();
|
159
|
|
160
|
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1594/PANGAEA.726855");
|
161
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "10.1594/PANGAEA.726855");
|
162
|
|
163
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
164
|
double d = sr.getScore();
|
165
|
System.out.println(String.format(" d ---> %s", d));
|
166
|
|
167
|
assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0);
|
168
|
}
|
169
|
|
170
|
@Test
|
171
|
public void testDistanceResultCaseDoiMatch3() {
|
172
|
final Config config = getResultConf();
|
173
|
|
174
|
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
|
175
|
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003");
|
176
|
|
177
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
178
|
double d = sr.getScore();
|
179
|
System.out.println(String.format(" d ---> %s", d));
|
180
|
|
181
|
assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0);
|
182
|
}
|
183
|
|
184
|
@Test
|
185
|
public void testDistanceResultCaseDoiMatch4() {
|
186
|
final Config config = getResultConf();
|
187
|
|
188
|
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
|
189
|
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005");
|
190
|
|
191
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
192
|
double d = sr.getScore();
|
193
|
System.out.println(String.format(" d ---> %s", d));
|
194
|
|
195
|
assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0);
|
196
|
}
|
197
|
|
198
|
@Test
|
199
|
public void testDistanceResultCaseDoiMatch5() {
|
200
|
|
201
|
final Config config = getResultConf();
|
202
|
|
203
|
final MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020");
|
204
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003");
|
205
|
|
206
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
207
|
double d = sr.getScore();
|
208
|
System.out.println(String.format(" d ---> %s", d));
|
209
|
|
210
|
assertTrue("a missing DOI, comparsion continues with the following conditions", (d > 0.9) && (d < 1.0));
|
211
|
}
|
212
|
|
213
|
@Test
|
214
|
public void testDistanceResultCaseDoiMatch6() {
|
215
|
final Config config = getResultConf();
|
216
|
|
217
|
final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
|
218
|
final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI");
|
219
|
|
220
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
221
|
double d = sr.getScore();
|
222
|
System.out.println(String.format(" d ---> %s", d));
|
223
|
|
224
|
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0);
|
225
|
}
|
226
|
|
227
|
@Test
|
228
|
public void testDistanceResultCaseDoiMatch7() {
|
229
|
final Config config = getResultConf();
|
230
|
|
231
|
final MapDocument resA = result(config, "A", "Adrenal Insufficiency asd asd", "1951", Lists.newArrayList("PMC2037944", "axdsds"));
|
232
|
final MapDocument resB = result(config, "B", "Adrenal Insufficiency", "1951", "PMC2037944");
|
233
|
|
234
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
235
|
double d = sr.getScore();
|
236
|
System.out.println(String.format(" d ---> %s", d));
|
237
|
|
238
|
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d > 0.9 & d < 1);
|
239
|
}
|
240
|
|
241
|
// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855
|
242
|
|
243
|
@Test
|
244
|
public void testDistanceResultCaseAuthor1() {
|
245
|
|
246
|
final Config config = getResultAuthorsConf();
|
247
|
|
248
|
final List<String> authorsA = Lists.newArrayList("a", "b", "c", "d");
|
249
|
final List<String> authorsB = Lists.newArrayList("a", "b", "c");
|
250
|
final List<String> pid = Lists.newArrayList();
|
251
|
|
252
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
|
253
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
|
254
|
|
255
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
256
|
final double d = sr.getScore();
|
257
|
System.out.println(String.format(" d ---> %s", d));
|
258
|
|
259
|
assertTrue(d == 0.0);
|
260
|
}
|
261
|
|
262
|
@Test
|
263
|
public void testDistanceResultCaseAuthor2() {
|
264
|
|
265
|
final Config config = getResultAuthorsConf();
|
266
|
|
267
|
final List<String> authorsA = Lists.newArrayList("a", "b", "c");
|
268
|
final List<String> authorsB = Lists.newArrayList("a", "b", "c");
|
269
|
final List<String> pid = Lists.newArrayList();
|
270
|
|
271
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
|
272
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
|
273
|
|
274
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
275
|
final double d = sr.getScore();
|
276
|
System.out.println(String.format(" d ---> %s", d));
|
277
|
|
278
|
assertTrue(d == 1.0);
|
279
|
}
|
280
|
|
281
|
@Test
|
282
|
public void testDistanceResultCaseAuthor3() {
|
283
|
|
284
|
final Config config = getResultAuthorsConf();
|
285
|
|
286
|
final List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M.");
|
287
|
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
|
288
|
final List<String> pid = Lists.newArrayList();
|
289
|
|
290
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
|
291
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
|
292
|
|
293
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
294
|
double d = sr.getScore();
|
295
|
System.out.println(String.format(" d ---> %s", d));
|
296
|
|
297
|
assertTrue((d > 0.9) && (d < 1.0));
|
298
|
}
|
299
|
|
300
|
@Test
|
301
|
public void testDistanceResultCaseAuthor4() {
|
302
|
|
303
|
final Config config = getResultAuthorsConf();
|
304
|
|
305
|
final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
|
306
|
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
|
307
|
final List<String> pid = Lists.newArrayList();
|
308
|
|
309
|
final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
|
310
|
final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
|
311
|
|
312
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
313
|
final double d = sr.getScore();
|
314
|
System.out.println(String.format(" d ---> %s", d));
|
315
|
|
316
|
// assertTrue(d.getScore() == 0.0);
|
317
|
}
|
318
|
|
319
|
@Test
|
320
|
public void testDistanceResultFullConf() {
|
321
|
|
322
|
final Config config = getResultFullConf();
|
323
|
|
324
|
final List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva");
|
325
|
final List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie");
|
326
|
|
327
|
final MapDocument resA =
|
328
|
result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
|
329
|
"10.1186/1752-1947-4-299", authorsA);
|
330
|
|
331
|
final MapDocument resB =
|
332
|
result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
|
333
|
"10.1186/1752-1947-4-299", authorsB);
|
334
|
|
335
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
336
|
final double d = sr.getScore();
|
337
|
System.out.println(String.format(" d ---> %s", d));
|
338
|
|
339
|
// assertTrue(d.getScore() == 0.0);
|
340
|
}
|
341
|
|
342
|
@Test
|
343
|
public void testDistanceProdConf1() {
|
344
|
|
345
|
final Config config = getResultProdConf();
|
346
|
|
347
|
final MapDocument resA =
|
348
|
result(config,
|
349
|
"A",
|
350
|
" Analysis of Transfer Embryo-Derived de-duplication");
|
351
|
final MapDocument resB =
|
352
|
result(config,
|
353
|
"B",
|
354
|
" Analysis of Transfer Embryo Derived deduplication");
|
355
|
|
356
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
357
|
final double d = sr.getScore();
|
358
|
System.out.println(String.format(" d ---> %s", d));
|
359
|
|
360
|
// assertTrue(d.getScore() == 0.0);
|
361
|
}
|
362
|
|
363
|
@Test
|
364
|
public void testDistanceProdConf2() {
|
365
|
|
366
|
final Config config = getResultProdConf();
|
367
|
|
368
|
final MapDocument resA =
|
369
|
result(config,
|
370
|
"A",
|
371
|
"qwerty aaabbbbbbbb bbb ccc ddddd");
|
372
|
final MapDocument resB =
|
373
|
result(config,
|
374
|
"B",
|
375
|
"qwert aaabbbbbbbb bbb ccc ddddd");
|
376
|
|
377
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
378
|
final double d = sr.getScore();
|
379
|
System.out.println(String.format(" d ---> %s", d));
|
380
|
|
381
|
// assertTrue(d.getScore() == 0.0);
|
382
|
}
|
383
|
|
384
|
@Test
|
385
|
public void testDistanceProdConf3() {
|
386
|
|
387
|
final Config config = getResultProdConf();
|
388
|
|
389
|
final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
|
390
|
final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
|
391
|
final List<String> pid = Lists.newArrayList();
|
392
|
|
393
|
final MapDocument resA =
|
394
|
result(config,
|
395
|
"A",
|
396
|
"qwerty aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsA);
|
397
|
final MapDocument resB =
|
398
|
result(config,
|
399
|
"B",
|
400
|
"qwert aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsB);
|
401
|
|
402
|
final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
|
403
|
final double d = sr.getScore();
|
404
|
System.out.println(String.format(" d ---> %s", d));
|
405
|
|
406
|
assertTrue(d == 0.0);
|
407
|
}
|
408
|
|
409
|
}
|