Project

General

Profile

1
package eu.dnetlib.pace.distance;
2

    
3
import java.util.List;
4

    
5
import com.google.common.collect.Lists;
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.distance.eval.ScoreResult;
9
import eu.dnetlib.pace.model.MapDocument;
10
import org.junit.Test;
11

    
12
import static org.junit.Assert.assertTrue;
13

    
14
public class DetectorTest extends AbstractProtoPaceTest {
15

    
16
	@Test
17
	public void testScoreResult() {
18
		final Config config = getResultProdConf();
19

    
20
		final MapDocument resA = result(config, "A", "Recent results from CDFsd");
21
		final MapDocument resB = result(config, "B", "Recent results from CDF");
22

    
23
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
24

    
25
		System.out.println(sr.toString());
26
	}
27

    
28
	@Test
29
	public void testDistanceResultSimple() {
30
		final Config config = getResultSimpleConf();
31

    
32
		final MapDocument resA = result(config, "A", "Recent results from CDF");
33
		final MapDocument resB = result(config, "B", "Recent results from CDF");
34

    
35
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
36
		final double d = sr.getScore();
37
		System.out.println(String.format(" d ---> %s", d));
38

    
39
		assertTrue(d == 1.0);
40
	}
41

    
42
	@Test
43
	public void testDistanceResultSimpleMissingDates() {
44
		final Config config = getResultSimpleConf();
45

    
46
		final MapDocument resA = result(config, "A", "Recent results from BES");
47
		final MapDocument resB = result(config, "A", "Recent results from CES");
48

    
49
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
50
		final double d = sr.getScore();
51
		System.out.println(String.format(" d ---> %s", d));
52

    
53
		assertTrue(d > 0.97);
54
	}
55

    
56
	@Test
57
	public void testDistanceResultInvalidDate() {
58
		final Config config = getResultConf();
59

    
60
		final MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05");
61
		final MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty");
62

    
63
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
64
		final double d = sr.getScore();
65
		System.out.println(String.format(" d ---> %s", d));
66

    
67
		assertTrue(d == 1.0);
68
	}
69

    
70
	@Test
71
	public void testDistanceResultMissingOneDate() {
72
		final Config config = getResultConf();
73

    
74
		final MapDocument resA = result(config, "A", "title title title 6BESR", null);
75
		final MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02");
76

    
77
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
78
		double d = sr.getScore();
79
		System.out.println(String.format(" d ---> %s", d));
80

    
81
		assertTrue((d > 0.9) && (d < 1.0));
82
	}
83

    
84
	@Test
85
	public void testDistanceResult() {
86
		final Config config = getResultConf();
87

    
88
		final MapDocument resA = result(config, "A", "title title title BES", "");
89
		final MapDocument resB = result(config, "B", "title title title CLEO");
90

    
91
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
92
		double d = sr.getScore();
93
		System.out.println(String.format(" d ---> %s", d));
94

    
95
		assertTrue((d > 0.9) && (d < 1.0));
96
	}
97

    
98
	@Test
99
	public void testDistanceResultMissingTwoDate() {
100
		final Config config = getResultConf();
101

    
102
		final MapDocument resA = result(config, "A", "title title title 6BESR");
103
		final MapDocument resB = result(config, "B", "title title title 6CLER");
104

    
105
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
106
		double d = sr.getScore();
107
		System.out.println(String.format(" d ---> %s", d));
108

    
109
		assertTrue((d > 0.9) && (d < 1.0));
110
	}
111

    
112
	@Test
113
	public void testDistanceOrganizationIgnoreMissing() {
114

    
115
		final Config config = getOrganizationSimpleConf();
116

    
117
		final MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE");
118
		final MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR");
119

    
120
		final ScoreResult sr = new PaceDocumentDistance().between(orgA, orgB, config);
121
		final double d = sr.getScore();
122
		System.out.println(String.format(" d ---> %s", d));
123

    
124
		assertTrue(d == 1.0);
125
	}
126

    
127
	@Test
128
	public void testDistanceResultCase1() {
129

    
130
		final Config config = getResultConf();
131

    
132
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003");
133
		final MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003");
134

    
135
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
136
		double d = sr.getScore();
137
		System.out.println(String.format(" d ---> %s", d));
138

    
139
		assertTrue((d > 0.9) && (d < 1.0));
140
	}
141

    
142
	@Test
143
	public void testDistanceResultCaseDoiMatch1() {
144
		final Config config = getResultConf();
145

    
146
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "10.1594/PANGAEA.726855");
147
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855");
148

    
149
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
150
		double d = sr.getScore();
151
		System.out.println(String.format(" d ---> %s", d));
152

    
153
		assertTrue("exact DOIs will produce an exact match", d == 1.0);
154
	}
155

    
156
	@Test
157
	public void testDistanceResultCaseDoiMatch2() {
158
		final Config config = getResultConf();
159

    
160
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1594/PANGAEA.726855");
161
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "10.1594/PANGAEA.726855");
162

    
163
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
164
		double d = sr.getScore();
165
		System.out.println(String.format(" d ---> %s", d));
166

    
167
		assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0);
168
	}
169

    
170
	@Test
171
	public void testDistanceResultCaseDoiMatch3() {
172
		final Config config = getResultConf();
173

    
174
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
175
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003");
176

    
177
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
178
		double d = sr.getScore();
179
		System.out.println(String.format(" d ---> %s", d));
180

    
181
		assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0);
182
	}
183

    
184
	@Test
185
	public void testDistanceResultCaseDoiMatch4() {
186
		final Config config = getResultConf();
187

    
188
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
189
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005");
190

    
191
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
192
		double d = sr.getScore();
193
		System.out.println(String.format(" d ---> %s", d));
194

    
195
		assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0);
196
	}
197

    
198
	@Test
199
	public void testDistanceResultCaseDoiMatch5() {
200

    
201
		final Config config = getResultConf();
202

    
203
		final MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020");
204
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003");
205

    
206
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
207
		double d = sr.getScore();
208
		System.out.println(String.format(" d ---> %s", d));
209

    
210
		assertTrue("a missing DOI, comparsion continues with the following conditions", (d > 0.9) && (d < 1.0));
211
	}
212

    
213
	@Test
214
	public void testDistanceResultCaseDoiMatch6() {
215
		final Config config = getResultConf();
216

    
217
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
218
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI");
219

    
220
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
221
		double d = sr.getScore();
222
		System.out.println(String.format(" d ---> %s", d));
223

    
224
		assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0);
225
	}
226

    
227
	@Test
228
	public void testDistanceResultCaseDoiMatch7() {
229
		final Config config = getResultConf();
230

    
231
		final MapDocument resA = result(config, "A", "Adrenal Insufficiency asd asd", "1951", Lists.newArrayList("PMC2037944", "axdsds"));
232
		final MapDocument resB = result(config, "B", "Adrenal Insufficiency", "1951", "PMC2037944");
233

    
234
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
235
		double d = sr.getScore();
236
		System.out.println(String.format(" d ---> %s", d));
237

    
238
		assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d > 0.9 & d < 1);
239
	}
240

    
241
	// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855
242

    
243
	@Test
244
	public void testDistanceResultCaseAuthor1() {
245

    
246
		final Config config = getResultAuthorsConf();
247

    
248
		final List<String> authorsA = Lists.newArrayList("a", "b", "c", "d");
249
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
250
		final List<String> pid = Lists.newArrayList();
251

    
252
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
253
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
254

    
255
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
256
		final double d = sr.getScore();
257
		System.out.println(String.format(" d ---> %s", d));
258

    
259
		assertTrue(d == 0.0);
260
	}
261

    
262
	@Test
263
	public void testDistanceResultCaseAuthor2() {
264

    
265
		final Config config = getResultAuthorsConf();
266

    
267
		final List<String> authorsA = Lists.newArrayList("a", "b", "c");
268
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
269
		final List<String> pid = Lists.newArrayList();
270

    
271
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
272
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
273

    
274
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
275
		final double d = sr.getScore();
276
		System.out.println(String.format(" d ---> %s", d));
277

    
278
		assertTrue(d == 1.0);
279
	}
280

    
281
	@Test
282
	public void testDistanceResultCaseAuthor3() {
283

    
284
		final Config config = getResultAuthorsConf();
285

    
286
		final List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M.");
287
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
288
		final List<String> pid = Lists.newArrayList();
289

    
290
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
291
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
292

    
293
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
294
		double d = sr.getScore();
295
		System.out.println(String.format(" d ---> %s", d));
296

    
297
		assertTrue((d > 0.9) && (d < 1.0));
298
	}
299

    
300
	@Test
301
	public void testDistanceResultCaseAuthor4() {
302

    
303
		final Config config = getResultAuthorsConf();
304

    
305
		final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
306
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
307
		final List<String> pid = Lists.newArrayList();
308

    
309
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
310
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
311

    
312
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
313
		final double d = sr.getScore();
314
		System.out.println(String.format(" d ---> %s", d));
315

    
316
		// assertTrue(d.getScore() == 0.0);
317
	}
318

    
319
	@Test
320
	public void testDistanceResultFullConf() {
321

    
322
		final Config config = getResultFullConf();
323

    
324
		final List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva");
325
		final List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie");
326

    
327
		final MapDocument resA =
328
				result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
329
						"10.1186/1752-1947-4-299", authorsA);
330

    
331
		final MapDocument resB =
332
				result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
333
						"10.1186/1752-1947-4-299", authorsB);
334

    
335
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
336
		final double d = sr.getScore();
337
		System.out.println(String.format(" d ---> %s", d));
338

    
339
		// assertTrue(d.getScore() == 0.0);
340
	}
341

    
342
	@Test
343
	public void testDistanceProdConf1() {
344

    
345
		final Config config = getResultProdConf();
346

    
347
		final MapDocument resA =
348
				result(config,
349
						"A",
350
						" Analysis of Transfer Embryo-Derived de-duplication");
351
		final MapDocument resB =
352
				result(config,
353
						"B",
354
						" Analysis of Transfer Embryo Derived deduplication");
355

    
356
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
357
		final double d = sr.getScore();
358
		System.out.println(String.format(" d ---> %s", d));
359

    
360
		// assertTrue(d.getScore() == 0.0);
361
	}
362

    
363
	@Test
364
	public void testDistanceProdConf2() {
365

    
366
		final Config config = getResultProdConf();
367

    
368
		final MapDocument resA =
369
				result(config,
370
						"A",
371
						"qwerty aaabbbbbbbb bbb ccc ddddd");
372
		final MapDocument resB =
373
				result(config,
374
						"B",
375
						"qwert aaabbbbbbbb bbb ccc ddddd");
376

    
377
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
378
		final double d = sr.getScore();
379
		System.out.println(String.format(" d ---> %s", d));
380

    
381
		// assertTrue(d.getScore() == 0.0);
382
	}
383

    
384
	@Test
385
	public void testDistanceProdConf3() {
386

    
387
		final Config config = getResultProdConf();
388

    
389
		final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
390
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
391
		final List<String> pid = Lists.newArrayList();
392

    
393
		final MapDocument resA =
394
				result(config,
395
						"A",
396
						"qwerty aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsA);
397
		final MapDocument resB =
398
				result(config,
399
						"B",
400
						"qwert aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsB);
401

    
402
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
403
		final double d = sr.getScore();
404
		System.out.println(String.format(" d ---> %s", d));
405

    
406
		assertTrue(d == 0.0);
407
	}
408

    
409
}
    (1-1/1)