Project

General

Profile

« Previous | Next » 

Revision 51226

renamed

View differences:

modules/dnet-graph-domain/trunk/deploy.info
1
{
2
		"type_source": "SVN",
3
		"goal": "package -U source:jar",
4
		"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-graph-domain/trunk/",
5
		"deploy_repository": "dnet5-snapshots",
6
		"version": "5",
7
		"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it",
8
		"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet5-snapshots",
9
		"name": "dnet-graph-domain"
10
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java
1
package eu.dnetlib.pace.distance;
2

  
3
import static org.junit.Assert.assertTrue;
4

  
5
import java.util.ArrayList;
6
import java.util.List;
7

  
8
import eu.dnetlib.pace.distance.eval.ScoreResult;
9
import org.junit.Test;
10

  
11
import com.google.common.collect.Lists;
12

  
13
import eu.dnetlib.pace.AbstractProtoPaceTest;
14
import eu.dnetlib.pace.config.Config;
15
import eu.dnetlib.pace.model.MapDocument;
16

  
17
public class DetectorTest extends AbstractProtoPaceTest {
18

  
19
	@Test
20
	public void testScoreResult() {
21
		final Config config = getResultProdConf();
22

  
23
		final MapDocument resA = result(config, "A", "Recent results from CDFsd");
24
		final MapDocument resB = result(config, "B", "Recent results from CDF");
25

  
26
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
27

  
28
		System.out.println(sr.toString());
29
	}
30

  
31
	@Test
32
	public void testDistanceResultSimple() {
33
		final Config config = getResultSimpleConf();
34

  
35
		final MapDocument resA = result(config, "A", "Recent results from CDF");
36
		final MapDocument resB = result(config, "B", "Recent results from CDF");
37

  
38
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
39
		final double d = sr.getScore();
40
		System.out.println(String.format(" d ---> %s", d));
41

  
42
		assertTrue(d == 1.0);
43
	}
44

  
45
	@Test
46
	public void testDistanceResultSimpleMissingDates() {
47
		final Config config = getResultSimpleConf();
48

  
49
		final MapDocument resA = result(config, "A", "Recent results from BES");
50
		final MapDocument resB = result(config, "A", "Recent results from CES");
51

  
52
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
53
		final double d = sr.getScore();
54
		System.out.println(String.format(" d ---> %s", d));
55

  
56
		assertTrue(d > 0.97);
57
	}
58

  
59
	@Test
60
	public void testDistanceResultInvalidDate() {
61
		final Config config = getResultConf();
62

  
63
		final MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05");
64
		final MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty");
65

  
66
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
67
		final double d = sr.getScore();
68
		System.out.println(String.format(" d ---> %s", d));
69

  
70
		assertTrue(d == 1.0);
71
	}
72

  
73
	@Test
74
	public void testDistanceResultMissingOneDate() {
75
		final Config config = getResultConf();
76

  
77
		final MapDocument resA = result(config, "A", "title title title 6BESR", null);
78
		final MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02");
79

  
80
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
81
		double d = sr.getScore();
82
		System.out.println(String.format(" d ---> %s", d));
83

  
84
		assertTrue((d > 0.9) && (d < 1.0));
85
	}
86

  
87
	@Test
88
	public void testDistanceResult() {
89
		final Config config = getResultConf();
90

  
91
		final MapDocument resA = result(config, "A", "title title title BES", "");
92
		final MapDocument resB = result(config, "B", "title title title CLEO");
93

  
94
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
95
		double d = sr.getScore();
96
		System.out.println(String.format(" d ---> %s", d));
97

  
98
		assertTrue((d > 0.9) && (d < 1.0));
99
	}
100

  
101
	@Test
102
	public void testDistanceResultMissingTwoDate() {
103
		final Config config = getResultConf();
104

  
105
		final MapDocument resA = result(config, "A", "title title title 6BESR");
106
		final MapDocument resB = result(config, "B", "title title title 6CLER");
107

  
108
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
109
		double d = sr.getScore();
110
		System.out.println(String.format(" d ---> %s", d));
111

  
112
		assertTrue((d > 0.9) && (d < 1.0));
113
	}
114

  
115
	@Test
116
	public void testDistanceOrganizationIgnoreMissing() {
117

  
118
		final Config config = getOrganizationSimpleConf();
119

  
120
		final MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE");
121
		final MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR");
122

  
123
		final ScoreResult sr = new PaceDocumentDistance().between(orgA, orgB, config);
124
		final double d = sr.getScore();
125
		System.out.println(String.format(" d ---> %s", d));
126

  
127
		assertTrue(d == 1.0);
128
	}
129

  
130
	@Test
131
	public void testDistanceResultCase1() {
132

  
133
		final Config config = getResultConf();
134

  
135
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003");
136
		final MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003");
137

  
138
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
139
		double d = sr.getScore();
140
		System.out.println(String.format(" d ---> %s", d));
141

  
142
		assertTrue((d > 0.9) && (d < 1.0));
143
	}
144

  
145
	@Test
146
	public void testDistanceResultCaseDoiMatch1() {
147
		final Config config = getResultConf();
148

  
149
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "10.1594/PANGAEA.726855");
150
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855");
151

  
152
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
153
		double d = sr.getScore();
154
		System.out.println(String.format(" d ---> %s", d));
155

  
156
		assertTrue("exact DOIs will produce an exact match", d == 1.0);
157
	}
158

  
159
	@Test
160
	public void testDistanceResultCaseDoiMatch2() {
161
		final Config config = getResultConf();
162

  
163
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1594/PANGAEA.726855");
164
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "10.1594/PANGAEA.726855");
165

  
166
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
167
		double d = sr.getScore();
168
		System.out.println(String.format(" d ---> %s", d));
169

  
170
		assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0);
171
	}
172

  
173
	@Test
174
	public void testDistanceResultCaseDoiMatch3() {
175
		final Config config = getResultConf();
176

  
177
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
178
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003");
179

  
180
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
181
		double d = sr.getScore();
182
		System.out.println(String.format(" d ---> %s", d));
183

  
184
		assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0);
185
	}
186

  
187
	@Test
188
	public void testDistanceResultCaseDoiMatch4() {
189
		final Config config = getResultConf();
190

  
191
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
192
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005");
193

  
194
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
195
		double d = sr.getScore();
196
		System.out.println(String.format(" d ---> %s", d));
197

  
198
		assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0);
199
	}
200

  
201
	@Test
202
	public void testDistanceResultCaseDoiMatch5() {
203

  
204
		final Config config = getResultConf();
205

  
206
		final MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020");
207
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003");
208

  
209
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
210
		double d = sr.getScore();
211
		System.out.println(String.format(" d ---> %s", d));
212

  
213
		assertTrue("a missing DOI, comparsion continues with the following conditions", (d > 0.9) && (d < 1.0));
214
	}
215

  
216
	@Test
217
	public void testDistanceResultCaseDoiMatch6() {
218
		final Config config = getResultConf();
219

  
220
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
221
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI");
222

  
223
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
224
		double d = sr.getScore();
225
		System.out.println(String.format(" d ---> %s", d));
226

  
227
		assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0);
228
	}
229

  
230
	@Test
231
	public void testDistanceResultCaseDoiMatch7() {
232
		final Config config = getResultConf();
233

  
234
		final MapDocument resA = result(config, "A", "Adrenal Insufficiency asd asd", "1951", Lists.newArrayList("PMC2037944", "axdsds"));
235
		final MapDocument resB = result(config, "B", "Adrenal Insufficiency", "1951", "PMC2037944");
236

  
237
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
238
		double d = sr.getScore();
239
		System.out.println(String.format(" d ---> %s", d));
240

  
241
		assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d > 0.9 & d < 1);
242
	}
243

  
244
	// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855
245

  
246
	@Test
247
	public void testDistanceResultCaseAuthor1() {
248

  
249
		final Config config = getResultAuthorsConf();
250

  
251
		final List<String> authorsA = Lists.newArrayList("a", "b", "c", "d");
252
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
253
		final List<String> pid = Lists.newArrayList();
254

  
255
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
256
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
257

  
258
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
259
		final double d = sr.getScore();
260
		System.out.println(String.format(" d ---> %s", d));
261

  
262
		assertTrue(d == 0.0);
263
	}
264

  
265
	@Test
266
	public void testDistanceResultCaseAuthor2() {
267

  
268
		final Config config = getResultAuthorsConf();
269

  
270
		final List<String> authorsA = Lists.newArrayList("a", "b", "c");
271
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
272
		final List<String> pid = Lists.newArrayList();
273

  
274
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
275
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
276

  
277
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
278
		final double d = sr.getScore();
279
		System.out.println(String.format(" d ---> %s", d));
280

  
281
		assertTrue(d == 1.0);
282
	}
283

  
284
	@Test
285
	public void testDistanceResultCaseAuthor3() {
286

  
287
		final Config config = getResultAuthorsConf();
288

  
289
		final List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M.");
290
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
291
		final List<String> pid = Lists.newArrayList();
292

  
293
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
294
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
295

  
296
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
297
		double d = sr.getScore();
298
		System.out.println(String.format(" d ---> %s", d));
299

  
300
		assertTrue((d > 0.9) && (d < 1.0));
301
	}
302

  
303
	@Test
304
	public void testDistanceResultCaseAuthor4() {
305

  
306
		final Config config = getResultAuthorsConf();
307

  
308
		final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
309
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
310
		final List<String> pid = Lists.newArrayList();
311

  
312
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", pid, authorsA);
313
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", pid, authorsB);
314

  
315
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
316
		final double d = sr.getScore();
317
		System.out.println(String.format(" d ---> %s", d));
318

  
319
		// assertTrue(d.getScore() == 0.0);
320
	}
321

  
322
	@Test
323
	public void testDistanceResultFullConf() {
324

  
325
		final Config config = getResultFullConf();
326

  
327
		final List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva");
328
		final List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie");
329

  
330
		final MapDocument resA =
331
				result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
332
						"10.1186/1752-1947-4-299", authorsA);
333

  
334
		final MapDocument resB =
335
				result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
336
						"10.1186/1752-1947-4-299", authorsB);
337

  
338
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
339
		final double d = sr.getScore();
340
		System.out.println(String.format(" d ---> %s", d));
341

  
342
		// assertTrue(d.getScore() == 0.0);
343
	}
344

  
345
	@Test
346
	public void testDistanceProdConf1() {
347

  
348
		final Config config = getResultProdConf();
349

  
350
		final MapDocument resA =
351
				result(config,
352
						"A",
353
						" Analysis of Transfer Embryo-Derived de-duplication");
354
		final MapDocument resB =
355
				result(config,
356
						"B",
357
						" Analysis of Transfer Embryo Derived deduplication");
358

  
359
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
360
		final double d = sr.getScore();
361
		System.out.println(String.format(" d ---> %s", d));
362

  
363
		// assertTrue(d.getScore() == 0.0);
364
	}
365

  
366
	@Test
367
	public void testDistanceProdConf2() {
368

  
369
		final Config config = getResultProdConf();
370

  
371
		final MapDocument resA =
372
				result(config,
373
						"A",
374
						"qwerty aaabbbbbbbb bbb ccc ddddd");
375
		final MapDocument resB =
376
				result(config,
377
						"B",
378
						"qwert aaabbbbbbbb bbb ccc ddddd");
379

  
380
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
381
		final double d = sr.getScore();
382
		System.out.println(String.format(" d ---> %s", d));
383

  
384
		// assertTrue(d.getScore() == 0.0);
385
	}
386

  
387
	@Test
388
	public void testDistanceProdConf3() {
389

  
390
		final Config config = getResultProdConf();
391

  
392
		final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
393
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
394
		final List<String> pid = Lists.newArrayList();
395

  
396
		final MapDocument resA =
397
				result(config,
398
						"A",
399
						"qwerty aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsA);
400
		final MapDocument resB =
401
				result(config,
402
						"B",
403
						"qwert aaabbbbbbbb bbb ccc ddddd", "2003", pid, authorsB);
404

  
405
		final ScoreResult sr = new PaceDocumentDistance().between(resA, resB, config);
406
		final double d = sr.getScore();
407
		System.out.println(String.format(" d ---> %s", d));
408

  
409
		assertTrue(d == 0.0);
410
	}
411

  
412
	@Test
413
	public void testDistancePersonConf1() {
414

  
415
		final Config config = getPersonConf();
416

  
417
		final MapDocument p1 = person(config, "p1_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi1.fo.json"));
418
		final MapDocument p2 = person(config, "p2_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi2.fo.json"));
419

  
420
		final ScoreResult sr = new PaceDocumentDistance().between(p1, p2, config);
421
		final double d = sr.getScore();
422
		System.out.println(String.format(" d ---> %s", d));
423

  
424
		// assertTrue(d.getScore() == 0.0);
425
	}
426

  
427
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.Type;
9
import eu.dnetlib.pace.model.FieldListImpl;
10
import eu.dnetlib.pace.model.FieldValueImpl;
11
import eu.dnetlib.pace.model.MapDocument;
12

  
13
public class ClusteringCombinerTest extends AbstractProtoPaceTest {
14

  
15
	private Config config;
16

  
17
	@Before
18
	public void setUp() {
19
		config = getResultFullConf();
20
	}
21

  
22
	@Test
23
	public void testCombine() {
24
		String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission";
25
		MapDocument result = result(config, "A", title, "2013");
26

  
27
		FieldListImpl fl = new FieldListImpl();
28
		fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty"));
29

  
30
		result.getFieldMap().put("desc", fl);
31
		System.out.println(title);
32
		System.out.println(ClusteringCombiner.combine(result, config));
33
	}
34

  
35
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.Type;
9
import eu.dnetlib.pace.model.FieldListImpl;
10
import eu.dnetlib.pace.model.FieldValueImpl;
11
import eu.dnetlib.pace.model.MapDocument;
12

  
13
public class BlacklistAwareClusteringCombinerTest extends AbstractProtoPaceTest {
14

  
15
	private Config config;
16

  
17
	@Before
18
	public void setUp() {
19
		config = getResultFullConf();
20
	}
21

  
22
	@Test
23
	public void testCombine() {
24
		final MapDocument result =
25
				result(config, "A", "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission", "2013");
26
		final FieldListImpl fl = new FieldListImpl();
27
		fl.add(new FieldValueImpl(Type.String, "desc", "hello world description pipeline"));
28

  
29
		result.getFieldMap().put("desc", fl);
30

  
31
		fl.clear();
32
		fl.add(new FieldValueImpl(Type.String, "title", "lorem ipsum cabalie qwerty"));
33
		final FieldListImpl field = (FieldListImpl) result.getFieldMap().get("title");
34
		field.add(fl);
35

  
36
		System.out.println(BlacklistAwareClusteringCombiner.filterAndCombine(result, config, config.blacklists()));
37
	}
38
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java
1
package eu.dnetlib.pace;
2

  
3
import java.io.IOException;
4
import java.io.StringWriter;
5
import java.util.ArrayList;
6
import java.util.List;
7

  
8
import com.google.common.collect.Lists;
9
import com.google.gson.Gson;
10
import eu.dnetlib.data.mapreduce.util.DNGFTest;
11
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
12
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
13
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
14
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
import eu.dnetlib.data.proto.PersonProtos.Person;
18
import eu.dnetlib.data.proto.PublicationProtos.Publication;
19
import eu.dnetlib.data.proto.TypeProtos;
20
import eu.dnetlib.pace.config.Config;
21
import eu.dnetlib.pace.config.DedupConfig;
22
import eu.dnetlib.pace.config.Type;
23
import eu.dnetlib.pace.model.Field;
24
import eu.dnetlib.pace.model.FieldValueImpl;
25
import eu.dnetlib.pace.model.MapDocument;
26
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
27
import eu.dnetlib.pace.model.gt.GTAuthor;
28
import eu.dnetlib.pace.model.gt.GTAuthorMapper;
29
import org.apache.commons.io.IOUtils;
30
import org.apache.commons.lang3.RandomStringUtils;
31
import org.apache.commons.lang3.StringUtils;
32

  
33
public abstract class AbstractProtoPaceTest extends DNGFTest {
34

  
35
	protected DedupConfig getResultFullConf() {
36
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf"));
37
	}
38

  
39
	protected DedupConfig getResultSimpleConf() {
40
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf"));
41
	}
42

  
43
	protected DedupConfig getResultConf() {
44
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf"));
45
	}
46

  
47
	protected DedupConfig getOrganizationSimpleConf() {
48
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf"));
49
	}
50

  
51
	protected DedupConfig getResultAuthorsConf() {
52
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf"));
53
	}
54

  
55
	protected DedupConfig getPersonConf() {
56
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf"));
57
	}
58

  
59
	protected DedupConfig getResultProdConf() {
60
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf"));
61
	}
62

  
63
	protected MapDocument person(final Config conf, final String id, final DNGF oaf) {
64
		return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model());
65
	}
66

  
67
	protected DNGF getPersonGT(final String path) {
68
		return new GTAuthorMapper().map(getGTAuthor(path));
69
	}
70

  
71
	protected GTAuthor getGTAuthor(final String path) {
72

  
73
		final Gson gson = new Gson();
74

  
75
		final String json = readFromClasspath(path);
76

  
77
		final GTAuthor gta = gson.fromJson(json, GTAuthor.class);
78

  
79
		return gta;
80
	}
81

  
82
	private String readFromClasspath(final String filename) {
83
		final StringWriter sw = new StringWriter();
84
		try {
85
			IOUtils.copy(getClass().getResourceAsStream(filename), sw);
86
			return sw.toString();
87
		} catch (final IOException e) {
88
			throw new RuntimeException("cannot load resource from classpath: " + filename);
89
		}
90
	}
91

  
92
	protected MapDocument result(final Config config, final String id, final String title) {
93
		return result(config, id, title, null, new ArrayList<String>(), null);
94
	}
95

  
96
	protected MapDocument result(final Config config, final String id, final String title, final String date) {
97
		return result(config, id, title, date, new ArrayList<String>(), null);
98
	}
99

  
100
	protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) {
101
		return result(config, id, title, date, pid, null);
102
	}
103

  
104
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
105
		return result(config, id, title, date, pid, null);
106
	}
107

  
108
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
109
		return result(config, id, title, date, Lists.newArrayList(pid), authors);
110
	}
111

  
112
	protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) {
113
		final Publication.Metadata.Builder metadata = Publication.Metadata.newBuilder();
114
		if (!StringUtils.isBlank(title)) {
115
			metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
116
			metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
117
		}
118
		if (!StringUtils.isBlank(date)) {
119
			metadata.setDateofacceptance(sf(date));
120
		}
121

  
122
		final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication);
123
		final Publication.Builder result = Publication.newBuilder().setMetadata(metadata);
124

  
125
		if (authors != null) {
126
			for (final String author : authors) {
127
				result.addAuthor(person(author));
128
			}
129
		}
130

  
131
		entity.setPublication(result);
132

  
133
		if (pid != null) {
134
			for(String p : pid) {
135
				if (!StringUtils.isBlank(p)) {
136
					entity.addPid(sp(p, "doi"));
137
					//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
138
				}
139
			}
140
		}
141

  
142
		final DNGFEntity build = entity.build();
143
		return ProtoDocumentBuilder.newInstance(id, build, config.model());
144
	}
145

  
146
	private Person.Builder person(final String author) {
147
		final Person.Builder person = Person.newBuilder();
148

  
149
		final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false);
150
		final Person.Metadata.Builder metadata = Person.Metadata.newBuilder();
151
		if (p.isAccurate()) {
152
			metadata.setFirstname(sf(p.getNormalisedFirstName()));
153
			metadata.addSecondnames(sf(p.getNormalisedSurname()));
154
			metadata.setFullname(sf(p.getNormalisedFullname()));
155
		} else {
156
			metadata.setFullname(sf(p.getOriginal()));
157
		}
158

  
159
		return person.setMetadata(metadata);
160
	}
161

  
162
	private DNGFEntity.Builder dngfEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
163
		final DNGFEntity.Builder entity = DNGFEntity.newBuilder().setId(id).setType(type);
164
		return entity;
165
	}
166

  
167
	protected MapDocument organization(final Config config, final String id, final String legalName) {
168
		return organization(config, id, legalName, null);
169
	}
170

  
171
	protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
172
		final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder();
173
		if (legalName != null) {
174
			metadata.setLegalname(sf(legalName));
175
		}
176
		if (legalShortName != null) {
177
			metadata.setLegalshortname(sf(legalShortName));
178
		}
179

  
180
		final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication);
181
		entity.setOrganization(Organization.newBuilder().setMetadata(metadata));
182

  
183
		return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model());
184
	}
185

  
186
	private StructuredProperty sp(final String pid, final String type) {
187
		final Builder pidSp =
188
				StructuredProperty.newBuilder().setValue(pid)
189
						.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
190
		return pidSp.build();
191
	}
192

  
193
	protected Field title(final String s) {
194
		return new FieldValueImpl(Type.String, "title", s);
195
	}
196

  
197
	protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) {
198
		return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
199
	}
200

  
201
	/*
202
	 * protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); }
203
	 * 
204
	 * protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return
205
	 * Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); }
206
	 */
207

  
208
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/model/ProtoDocumentBuilderTest.java
1
package eu.dnetlib.pace.model;
2

  
3
import com.google.common.collect.Iterables;
4
import com.google.common.collect.Sets;
5
import com.google.common.collect.Sets.SetView;
6
import com.googlecode.protobuf.format.JsonFormat;
7
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
9
import eu.dnetlib.pace.AbstractProtoPaceTest;
10
import eu.dnetlib.pace.config.Config;
11
import eu.dnetlib.pace.model.adaptor.Pid;
12
import org.junit.Test;
13

  
14
import static org.junit.Assert.assertFalse;
15
import static org.junit.Assert.assertTrue;
16

  
17
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest {
18

  
19
	@Test
20
	public void test_serialise1() {
21

  
22
		final String id = "12345";
23

  
24
		final Config config = getResultFullConf();
25

  
26
		final MapDocument document = ProtoDocumentBuilder.newInstance(id, getResult(id), config.model());
27

  
28
		assertFalse(document.fieldNames().isEmpty());
29
		assertFalse(Iterables.isEmpty(document.fields()));
30

  
31
		System.out.println("original:\n" + document);
32

  
33
		final String stringDoc = MapDocumentSerializer.toString(document);
34

  
35
		System.out.println("srialization:\n" + stringDoc);
36

  
37
		final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes());
38

  
39
		final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames());
40

  
41
		assertTrue(diff.isEmpty());
42

  
43
		System.out.println("decoded:\n" + decoded);
44
	}
45

  
46
	@Test
47
	public void test_serialise2() {
48

  
49
		final String id = "12345";
50
		final String path = "/eu/dnetlib/pace/model/gt.author.manghi1.json";
51

  
52
		final Config config = getPersonConf();
53

  
54
		final MapDocument document = ProtoDocumentBuilder.newInstance(id, getPersonGT(path).getEntity(), config.model());
55

  
56
		assertFalse(document.fieldNames().isEmpty());
57
		assertFalse(Iterables.isEmpty(document.fields()));
58

  
59
		System.out.println("original:\n" + document);
60

  
61
		final String stringDoc = MapDocumentSerializer.toString(document);
62

  
63
		System.out.println("srialization:\n" + stringDoc);
64

  
65
		final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes());
66

  
67
		final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames());
68

  
69
		assertTrue(diff.isEmpty());
70

  
71
		System.out.println("decoded:\n" + decoded);
72
	}
73

  
74

  
75
	@Test
76
	public void testPidSerialization() {
77

  
78
		final StructuredProperty sp = StructuredProperty.newBuilder().setValue("1234").setQualifier(
79
				Qualifier.newBuilder().setClassid("doi").setClassname("doi").setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types")).build();
80

  
81
		final String json = JsonFormat.printToString(sp);
82

  
83
		final Pid pid = Pid.fromOafJson(json);
84

  
85

  
86

  
87

  
88
	}
89

  
90
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/pace/model/gt/AuthorTest.java
1
package eu.dnetlib.pace.model.gt;
2

  
3
import com.google.common.collect.Sets;
4
import org.junit.Test;
5

  
6
import java.util.Set;
7

  
8
import static org.junit.Assert.assertTrue;
9

  
10
public class AuthorTest {
11

  
12
	@Test
13
	public void test() {
14
		final Set<Author> s1 = getAuthors(3);
15
		final Set<Author> s2 = getAuthors(3);
16

  
17
		final Set<Author> i = Sets.intersection(s1, s2);
18

  
19
		System.out.println(i);
20

  
21
		assertTrue(i.size() == 3);
22

  
23
	}
24

  
25
	@Test
26
	public void test1() {
27
		final Authors a1 = new Authors(a("1", "Wang, M."));
28
		final Authors a2 = new Authors(a("1", "Wang, M."));
29

  
30
		final Set<Author> i = Sets.intersection(a1, a2);
31

  
32
		assertTrue(i.size() == 1);
33

  
34
	}
35

  
36
	private Set<Author> getAuthors(final int n) {
37
		final Set<Author> s = Sets.newHashSet();
38

  
39
		for (int i = 0; i < n; i++) {
40
			s.add(a(i + "", "name" + i));
41
		}
42
		return s;
43
	}
44

  
45
	private Author a(final String id, final String fullname) {
46
		final Author a = new Author();
47
		a.setId(id);
48
		a.setFullname(fullname);
49
		return a;
50
	}
51

  
52
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.List;
4

  
5
import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv;
6
import org.junit.After;
7
import org.junit.Before;
8
import org.junit.Test;
9

  
10
import static org.junit.Assert.assertEquals;
11

  
12
/**
13
 * OpenTrialsXsltFunctions Tester.
14
 *
15
 */
16
public class OpenTrialsXsltFunctionsTest {
17

  
18
	private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
19
	private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
20
	private String jidentifiers = "{112683,NCT00920439}";
21

  
22
	@Before
23
	public void before() throws Exception {
24
	}
25

  
26
	@After
27
	public void after() throws Exception {
28
	}
29

  
30
	/**
31
	 * Method: getProvs(String jsonProvList)
32
	 */
33
	@Test
34
	public void testGetProvs() throws Exception {
35
		List<JsonProv> list = OpenTrialsXsltFunctions.getProvs(jsonProv);
36
		assertEquals(2, list.size());
37
	}
38

  
39
	/**
40
	 * Method: getMainIdentifierURL(String jsonProvList)
41
	 */
42
	@Test
43
	public void testGetMainIdentifierURL() throws Exception {
44
		String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv);
45
		assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url );
46
		url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull);
47
		assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url);
48
	}
49

  
50

  
51

  
52
} 
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/OntologyLoaderTest.java
1
package eu.dnetlib.data.transform;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5

  
6
import org.apache.commons.lang3.StringUtils;
7
import org.junit.Assert;
8
import org.junit.Test;
9

  
10
import static org.junit.Assert.assertEquals;
11
import static org.junit.Assert.assertNotNull;
12
import static org.junit.Assert.assertTrue;
13

  
14
/**
15
 * Created by claudio on 12/12/2016.
16
 */
17
public class OntologyLoaderTest {
18

  
19
	private String basePath = "/eu/dnetlib/bootstrap/profiles/OntologyDSResources/OntologyDSResourceType/";
20

  
21
	@Test
22
	public void testLoadOntologyFromCp() {
23

  
24
		final InputStream i = getClass().getResourceAsStream(basePath + "publication_publication_relations.xml");
25

  
26
        Ontology o = OntologyLoader.loadOntologyFromCp(i);
27
        checkOntology(o);
28

  
29
        String providedBy = o.inverseOf("isSupplementedBy");
30
        assertEquals(providedBy, "isSupplementTo");
31

  
32
        String provides = o.inverseOf("isPartOf");
33
        assertEquals(provides, "hasPart");
34
    }
35

  
36
	@Test
37
	public void testLoadOntologiesFromCp() throws IOException {
38

  
39
		OntologyLoader.loadOntologiesFromCp().values().forEach(o -> checkOntology(o));
40
	}
41

  
42
	@Test
43
	public void testLoadOntologiesSerialization() throws IOException {
44

  
45
		final Ontologies o = OntologyLoader.loadOntologiesFromCp();
46
		assertNotNull(o);
47
		final String json = o.toJson(true);
48

  
49
		System.out.println(json);
50

  
51
		assertTrue(StringUtils.isNoneBlank(json));
52

  
53
		final Ontologies o1 = OntologyLoader.loadOntologies(json);
54

  
55
		assertNotNull(o1);
56

  
57
		o1.entrySet().forEach(e -> checkOntology(e.getValue()));
58
	}
59

  
60
	private void checkOntology(Ontology o) {
61
		Assert.assertNotNull(o);
62
		Assert.assertTrue(StringUtils.isNotBlank(o.getCode()));
63
		Assert.assertTrue(StringUtils.isNotBlank(o.getDescription()));
64
		Assert.assertNotNull(o.getTerms().values());
65

  
66
		o.getTerms().values().forEach(it -> {
67
			Assert.assertTrue(StringUtils.isNotBlank(it.getCode()));
68
			Assert.assertTrue(StringUtils.isNotBlank(it.getEncoding()));
69
			Assert.assertTrue(StringUtils.isNotBlank(it.getEnglishName()));
70
			Assert.assertTrue(StringUtils.isNotBlank(it.getNativeName()));
71
			Assert.assertTrue(StringUtils.isNotBlank(it.getInverseCode()));
72
			Assert.assertNotNull(o.getTerms().get(it.getInverseCode()));
73
		});
74
	}
75
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/transform/SolrProtoMapperTest.java
1
package eu.dnetlib.data.transform;
2

  
3
import java.io.IOException;
4
import java.io.StringWriter;
5
import java.text.ParseException;
6
import java.text.SimpleDateFormat;
7
import java.util.Arrays;
8
import java.util.List;
9

  
10
import com.google.protobuf.InvalidProtocolBufferException;
11
import com.googlecode.protobuf.format.JsonFormat;
12
import eu.dnetlib.data.mapreduce.util.DNGFTest;
13
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
14
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
15
import eu.dnetlib.data.proto.KindProtos.Kind;
16
import org.apache.commons.codec.binary.Base64;
17
import org.apache.commons.io.IOUtils;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.apache.solr.common.SolrInputDocument;
21
import org.apache.solr.common.SolrInputField;
22
import org.dom4j.DocumentException;
23
import org.junit.Before;
24
import org.junit.Test;
25

  
26
import static org.junit.Assert.assertFalse;
27
import static org.junit.Assert.assertNotNull;
28

  
29
public class SolrProtoMapperTest {
30

  
31
	private static final Log log = LogFactory.getLog(SolrProtoMapper.class); // NOPMD by marko on 11/24/08 5:02 PM
32

  
33
	private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
34

  
35
	private final static List<String> dateFormats = Arrays.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
36

  
37
	private final static String dataset = "{\"kind\": \"entity\",\"entity\": {\"type\": \"dataset\",\"originalId\": [\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"C1293870510-PODAAC\",\"C1293870494-PODAAC\",\"C1293870514-PODAAC\",\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|dedup_wf_001::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"children\": [{\"type\": \"dataset\",\"originalId\": [\"C1293870514-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ed18768335fcac40ae2cc062abc9e442\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T01:56:49.933Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870494-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ac6f36f8c9ef39f815045eaa1182e745\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T02:51:51.788Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::a26b222d236fd523df9711c21a879911\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:01:46.198Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870510-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}}],\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},\"dataInfo\": {\"inferred\": true,\"deletedbyinference\": false,\"trust\": \"0.9\",\"inferenceprovenance\": \"dedup-similarity-dataset\",\"provenanceaction\": {\"classid\": \"sysimport:dedup\",\"classname\": \"sysimport:dedup\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}}}";
38

  
39
	private String fields;
40

  
41
	@Before
42
	public void setUp() throws IOException {
43
		final StringWriter sw = new StringWriter();
44
		IOUtils.copy(getClass().getResourceAsStream("fields.xml"), sw);
45
		fields = sw.toString();
46
		assertNotNull(fields);
47
		assertFalse(fields.isEmpty());
48

  
49
		log.info(fields);
50
	}
51

  
52
	@Test
53
	public void testProto2SolrDocument() throws DocumentException, InvalidProtocolBufferException, JsonFormat.ParseException {
54
		final SolrProtoMapper mapper = new SolrProtoMapper(fields);
55

  
56
		assertNotNull(mapper);
57

  
58
		DNGF.Builder builder = DNGF.newBuilder();
59
		JsonFormat.merge(dataset, builder);
60

  
61
		final DNGF oaf = builder.build();
62

  
63
		assertNotNull(oaf.getEntity().getChildrenList());
64
		assertFalse(oaf.getEntity().getChildrenList().isEmpty());
65

  
66
		log.info("byte[] size: " + oaf.toByteArray().length);
67

  
68
		log.info("json size:   " + JsonFormat.printToString(oaf).length());
69

  
70
		log.info("base64 size: " + Base64.encodeBase64String(oaf.toByteArray()).length());
71

  
72
		final byte[] decodeBase64 = Base64.decodeBase64(Base64.encodeBase64String(oaf.toByteArray()));
73

  
74
		log.info("decoded: " + JsonFormat.printToString(DNGF.parseFrom(decodeBase64)));
75

  
76
		final SolrInputDocument doc = mapper.map(oaf, getParsedDateField("2015-02-15"), "asd", "action-set");
77

  
78
		assertNotNull(doc);
79

  
80
		for (final SolrInputField f : doc.values()) {
81
			log.info(f);
82
		}
83
	}
84

  
85

  
86
	/**
87
	 * method return a solr-compatible string representation of a date
88
	 *
89
	 * @param date
90
	 * @return
91
	 * @throws DocumentException
92
	 * @throws ParseException
93
	 */
94
	public String getParsedDateField(final String date) {
95
		for (String formatString : dateFormats) {
96
			try {
97
				return new SimpleDateFormat(outFormat).format(new SimpleDateFormat(formatString).parse(date));
98
			} catch (ParseException e) {}
99
		}
100
		throw new IllegalStateException("unable to parse date: " + date);
101
	}
102
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/graph/utils/RelDescriptorTest.java
1
package eu.dnetlib.data.graph.utils;
2

  
3
import org.junit.Test;
4
import static org.junit.Assert.assertEquals;
5

  
6
/**
7
 * Created by claudio on 02/02/2017.
8
 */
9
public class RelDescriptorTest {
10

  
11
	@Test
12
	public void testRelDesriptor() {
13

  
14
		final RelDescriptor rd = new RelDescriptor("publication_publication_isMergedIn");
15

  
16
		assertEquals("publication_publication", rd.getOntologyCode());
17
		assertEquals("publication_publication", rd.asQualifier().getSchemeid());
18
		assertEquals("isMergedIn", rd.getTermCode());
19
		assertEquals("isMergedIn", rd.asQualifier().getClassid());
20

  
21
		//assertEquals(rd.getTargetId(), "50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab");
22
	}
23

  
24
	@Test
25
	public void testRelDesriptorFull() {
26

  
27
		final RelDescriptor rd = new RelDescriptor("publication_publication_isMergedIn:50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab");
28

  
29
		assertEquals("publication_publication", rd.getOntologyCode());
30
		assertEquals("publication_publication", rd.asQualifier().getSchemeid());
31
		assertEquals("isMergedIn", rd.getTermCode());
32
		assertEquals("isMergedIn", rd.asQualifier().getClassid());
33
		assertEquals("50|dedup_wf_001::01d734887f7e33fc754b22de0940c4ab", rd.getTargetId());
34
	}
35

  
36
	@Test(expected = IllegalArgumentException.class)
37
	public void testRelDesriptorMalformed() {
38

  
39
		new RelDescriptor("asd");
40
	}
41

  
42
	@Test(expected = IllegalArgumentException.class)
43
	public void testRelDesriptorEmpty() {
44

  
45
		new RelDescriptor("");
46
	}
47

  
48
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import com.google.protobuf.GeneratedMessage;
4
import com.google.protobuf.InvalidProtocolBufferException;
5
import eu.dnetlib.data.graph.model.DNGFDecoder;
6
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
7
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
8
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
9
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
10
import eu.dnetlib.data.proto.FieldTypeProtos.*;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
12
import eu.dnetlib.data.proto.KindProtos.Kind;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos.Person;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.PublicationProtos.Publication;
17
import eu.dnetlib.data.proto.TypeProtos.Type;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20

  
21
public class DNGFTest {
22

  
23
	public static final String CITATION_JSON =
24
			"<citations>\n  <citation>\n    <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n  </citation>\n  <citation>\n    <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n  </citation>\n  <citation>\n    <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n  </citation>\n  <citation>\n    <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n  </citation>\n  <citation>\n    <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n  </citation>\n  <citation>\n    <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n  </citation>\n  <citation>\n    <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n  </citation>\n  <citation>\n    <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n  </citation>\n  <citation>\n    <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n  </citation>\n  <citation>\n    <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n  </citation>\n  <citation>\n    <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n  </citation>\n  <citation>\n    <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n  </citation>\n  <citation>\n    <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n  </citation>\n  <citation>\n    <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n  </citation>\n  <citation>\n    <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n  </citation>\n  <citation>\n    <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n  </citation>\n  <citation>\n    <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n  </citation>\n  <citation>\n    <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n  </citation>\n  <citation>\n    <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n  </citation>\n  <citation>\n    <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n  </citation>\n  <citation>\n    <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n  </citation>\n  <citation>\n    <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n  </citation>\n  <citation>\n    <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n  </citation>\n  <citation>\n    <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n  </citation>\n  <citation>\n    <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n  </citation>\n  <citation>\n    <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n  </citation>\n  <citation>\n    <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n  </citation>\n  <citation>\n    <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n  </citation>\n  <citation>\n    <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n  </citation>\n</citations>";
25
	public static final String STATISTICS_JSON =
26
			"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]";
27
    private static final Log log = LogFactory.getLog(DNGFTest.class);
28
    private static String basePathProfiles = "/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType";
29

  
30
	public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) {
31
		return getStructuredproperty(value, classname, schemename, null);
32
	}
33

  
34
	public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) {
35
		final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename));
36
		if (dataInfo != null) {
37
			sp.setDataInfo(dataInfo);
38
		}
39
		return sp;
40
	}
41

  
42
	public static Qualifier.Builder getQualifier(final String classname, final String schemename) {
43
		return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename);
44
	}
45

  
46
	public static KeyValue getKV(final String id, final String name) {
47
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
48
	}
49

  
50
	public static DNGFEntity getDatasource(final String datasourceId) {
51
		return DNGFEntity
52
				.newBuilder()
53
				.setType(Type.datasource)
54
				.setId(datasourceId)
55
				.setDatasource(
56
						Datasource.newBuilder().setMetadata(
57
								Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname"))
58
										.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage"))
59
										.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems"))
60
										.setOdnumberofitemsdate(sf("numberofitems date"))
61
										// .addOdsubjects("subjects")
62
										.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes"))
63
										.setDatasourcetype(getQualifier("type class", "type scheme")))).build();
64
	}
65

  
66
	public static DNGFEntity getResult(final String id) {
67
		return getResultBuilder(id).build();
68
	}
69

  
70
	public static DNGFEntity.Builder getResultBuilder(final String id) {
71
		return DNGFEntity
72
				.newBuilder()
73
				.setType(Type.publication)
74
				.setId(id)
75
				.setPublication(
76
						Publication.newBuilder()
77
								.setMetadata(
78
										Publication.Metadata
79
												.newBuilder()
80
												.addTitle(
81
														getStructuredproperty(
82
																"Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population",
83
																"main title", "dnet:result_titles", getDataInfo()))
84
												.addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo()))
85
												.addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects"))
86
												.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB"))
87
												.addContext(Context.newBuilder().setId("egi::virtual::970"))
88
												.addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied"))
89
												.addContext(Context.newBuilder().setId("egi::classification::natsc::math"))
90
												.addContext(Context.newBuilder().setId("egi::classification::natsc"))
91
												.addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi"))
92
												.addDescription(sf("Responsible for making and maintaining the extracellular matrix ..."))
93
												.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD"))
94
												.setResulttype(getQualifier("publication", "dnet:result_types"))
95
												.setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed"))
96
								.addInstance(getInstance("10|od__10", "arxiv")))
97
				.addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive"))
98
				.addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids"))
99
				.setDateofcollection("");
100
	}
101

  
102
	public static DataInfo getDataInfo() {
103
		return getDataInfo("0.4");
104
	}
105

  
106
	public static DataInfo getDataInfo(final String trust) {
107
		return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy"))
108
				.build();
109
	}
110

  
111
	public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) {
112
		return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setLicence(getQualifier("OpenAccess", "dnet:access_modes"))
113
				.setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url");
114

  
115
	}
116

  
117
	public static DNGFRel getDedupRel(final String source, final String target) {
118
		return DNGFRel.newBuilder().setSource(source).setTarget(target).setRelType(getQualifier("dedup", "dedupScheme"))
119
				.setChild(false).setCachedTarget(getResult(target)).build();
120
	}
121

  
122

  
123
	public static DNGFEntity getPerson() {
124
		return DNGFEntity
125
				.newBuilder()
126
				.setType(Type.person)
127
				.setId("WOS:000277866500014_A._Shirazi-Adl")
128
				.setPerson(
129
						Person.newBuilder().setMetadata(
130
								Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl"))
131
										.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(getQualifier("EN", "dnet:countries"))))
132
				.build();
133
	}
134

  
135

  
136

  
137
	public static DNGFEntity getOrganization(final String orgId) {
138
		return DNGFEntity
139
				.newBuilder()
140
				.setType(Type.organization)
141
				.setId(orgId)
142
				.addCollectedfrom(getKV("opendoar_1234", "UK pubmed"))
143
				.setOrganization(
144
						Organization.newBuilder().setMetadata(
145
								Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE"))
146
										.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org"))
147
										.setCountry(getQualifier("ML", "dnet:countries")))).build();
148
	}
149

  
150
	public static DNGFEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException {
151
		return DNGFEntity
152
				.newBuilder()
153
				.setType(Type.project)
154
				.setId(projectId)
155
				.addCollectedfrom(getKV("opendoar_1234", "UK pubmed"))
156
				.setProject(
157
						Project.newBuilder()
158
								.setMetadata(
159
										Project.Metadata
160
												.newBuilder()
161
												.setAcronym(sf("5CYRQOL"))
162
												.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life"))
163
												.setStartdate(sf("2007-05-01"))
164
												.setEnddate(sf("2007-10-31"))
165
												.setEcsc39(sf("false"))
166
												.setContracttype(getQualifier("CSA", "ec:FP7contractTypes"))
167
												.addFundingtree(
168
														sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::"
169
																+ fundingProgram
170
																+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::"
171
																+ fundingProgram
172
																+ "</id><description>"
173
																+ fundingProgram
174
																+ "-People</description><name>"
175
																+ fundingProgram
176
																+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>"))))
177
				.build();
178
	}
179

  
180
	public static DNGFEntity getProjectWT() throws InvalidProtocolBufferException {
181
		return DNGFEntity
182
				.newBuilder()
183
				.setType(Type.project)
184
				.setId("project|wt::087536")
185
				.addCollectedfrom(getKV("wellcomeTrust", "wellcome trust"))
186
				.setProject(
187
						Project.newBuilder()
188
								.setMetadata(
189
										Project.Metadata
190
												.newBuilder()
191
												.setAcronym(sf("UNKNOWN"))
192
												.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP)."))
193
												.setStartdate(sf("2007-05-01"))
194
												.setEnddate(sf("2007-10-31"))
195
												.setEcsc39(sf("false"))
196
												.setContracttype(getQualifier("UNKNOWN", "wt:contractTypes"))
197
												.addFundingtree(
198
														sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>"))
199
												.addFundingtree(
200
														sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>"))))
201
				.build();
202
	}
203

  
204
	public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) {
205
		final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value);
206
		return e.build();
207
	}
208

  
209
	public static StringField sf(final String s) {
210
		return sf(s, null);
211
	}
212

  
213
	public static StringField sf(final String s, final DataInfo dataInfo) {
214
		final StringField.Builder sf = StringField.newBuilder().setValue(s);
215
		if (dataInfo != null) {
216
			sf.setDataInfo(dataInfo);
217
		}
218
		return sf.build();
219
	}
220

  
221
	public static DNGFDecoder embed(final GeneratedMessage msg,
222
			final Kind kind,
223
			final boolean deletedByInference,
224
			final boolean inferred,
225
			final String provenance,
226
			final String action) {
227

  
228
		final DNGF.Builder dngf = DNGF.newBuilder()
229
				.setKind(kind)
230
				.setLastupdatetimestamp(System.currentTimeMillis())
231
				.setDataInfo(
232
						DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5")
233
								.setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action)));
234
		switch (kind) {
235
		case entity:
236
			dngf.setEntity((DNGFEntity) msg);
237
			break;
238
		case relation:
239
			dngf.setRel((DNGFRel) msg);
240
			break;
241
		default:
242
			break;
243
		}
244

  
245
		return DNGFDecoder.decode(dngf.build());
246
	}
247

  
248
	public static DNGFDecoder embed(final GeneratedMessage msg, final Kind kind) {
249
		return embed(msg, kind, false, false, "inference_provenance", "provenance_action");
250
	}
251

  
252

  
253
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.util.List;
4

  
5
import eu.dnetlib.data.graph.model.DNGFDecoder;
6
import eu.dnetlib.data.proto.KindProtos.Kind;
7
import org.junit.Test;
8

  
9
import static org.junit.Assert.assertFalse;
10
import static org.junit.Assert.assertNotNull;
11

  
12
public class DNGFDecoderTest {
13

  
14
	@Test
15
	public void testAsXml() {
16

  
17
		final DNGFDecoder decoder = DNGFTest.embed(DNGFTest.getResult("50|id_1"), Kind.entity);
18

  
19
		assertNotNull(decoder);
20

  
21
		assertNotNull(decoder.asXml());
22

  
23
		System.out.println(decoder.asXml());
24

  
25
	}
26

  
27
	@Test
28
	public void testGetFieldValues() {
29
		final DNGFDecoder decoder = DNGFTest.embed(DNGFTest.getResult("50|id_1"), Kind.entity);
30

  
31
		final String path = "publication/metadata/title/value";
32
		final List<String> titles = decoder.decodeEntity().getFieldValues(path);
33

  
34
		assertNotNull(titles);
35
		assertFalse(titles.isEmpty());
36
	}
37
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFRowKeyDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff