Project

General

Profile

« Previous | Next » 

Revision 52199

cleanup

View differences:

modules/dnet-openaireplus-mapping-utils/branches/2.2/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-openaireplus-mapping-utils"}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java
1
package eu.dnetlib.pace.distance;
2

  
3
import static org.junit.Assert.assertTrue;
4

  
5
import java.util.List;
6

  
7
import org.junit.Test;
8

  
9
import com.google.common.collect.Lists;
10

  
11
import eu.dnetlib.pace.AbstractProtoPaceTest;
12
import eu.dnetlib.pace.config.Config;
13
import eu.dnetlib.pace.model.document.MapDocument;
14

  
15
public class DetectorTest extends AbstractProtoPaceTest {
16

  
17
	@Test
18
	public void testDistanceResultSimple() {
19
		Config config = getResultSimpleConf();
20

  
21
		MapDocument resA = result(config, "A", "Recent results from CDF", null);
22
		MapDocument resB = result(config, "B", "Recent results from CDF", null);
23

  
24
		double d = new PaceDocumentDistance().between(resA, resB, config);
25
		System.out.println(String.format(" d ---> %s", d));
26

  
27
		assertTrue(d == 1.0);
28
	}
29

  
30
	@Test
31
	public void testDistanceResultSimpleMissingDates() {
32
		Config config = getResultSimpleConf2();
33

  
34
		MapDocument resA = result(config, "A", "Recent results from BES", null);
35
		MapDocument resB = result(config, "A", "Recent results from CES", null);
36

  
37
		double d = new PaceDocumentDistance().between(resA, resB, config);
38
		System.out.println(String.format(" d ---> %s", d));
39

  
40
		assertTrue(d > 0.97);
41
	}
42

  
43
	@Test
44
	public void testDistanceResultInvalidDate() {
45
		Config config = getResultConf();
46

  
47
		MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05");
48
		MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty");
49

  
50
		double d = new PaceDocumentDistance().between(resA, resB, config);
51
		System.out.println(String.format(" d ---> %s", d));
52

  
53
		assertTrue(d == 1.0);
54
	}
55

  
56
	@Test
57
	public void testDistanceResultMissingOneDate() {
58
		Config config = getResultConf();
59

  
60
		MapDocument resA = result(config, "A", "title title title 6BESR", null);
61
		MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02");
62

  
63
		double d = new PaceDocumentDistance().between(resA, resB, config);
64
		System.out.println(String.format(" d ---> %s", d));
65

  
66
		assertTrue((d > 0.9) && (d < 1.0));
67
	}
68

  
69
	@Test
70
	public void testDistanceResult() {
71
		Config config = getResultConf();
72

  
73
		MapDocument resA = result(config, "A", "title title title BES", "");
74
		MapDocument resB = result(config, "B", "title title title CLEO", null);
75

  
76
		double d = new PaceDocumentDistance().between(resA, resB, config);
77
		System.out.println(String.format(" d ---> %s", d));
78

  
79
		// assertTrue(d > 0.9 && d < 1.0);
80
	}
81

  
82
	@Test
83
	public void testDistanceResultMissingTwoDate() {
84
		Config config = getResultConf();
85

  
86
		MapDocument resA = result(config, "A", "title title title 6BESR", null);
87
		MapDocument resB = result(config, "B", "title title title 6CLER", null);
88

  
89
		double d = new PaceDocumentDistance().between(resA, resB, config);
90

  
91
		System.out.println(String.format(" d ---> %s", d));
92

  
93
		assertTrue((d > 0.9) && (d < 1.0));
94
	}
95

  
96
	@Test
97
	public void testDistanceOrganizationIgnoreMissing() {
98

  
99
		Config config = getOrganizationSimpleConf();
100

  
101
		MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE", null);
102
		MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR");
103

  
104
		double d = new PaceDocumentDistance().between(orgA, orgB, config);
105
		System.out.println(String.format(" d ---> %s", d));
106

  
107
		assertTrue(d == 1.0);
108
	}
109

  
110
	@Test
111
	public void testDistanceResultCase1() {
112

  
113
		Config config = getResultConf();
114

  
115
		MapDocument resA = result(config, "A", "Search for the Standard Model Higgs boson", "2003");
116
		MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003");
117

  
118
		double d = new PaceDocumentDistance().between(resA, resB, config);
119
		System.out.println(String.format(" d ---> %s", d));
120

  
121
		assertTrue((d > 0.9) && (d < 1.0));
122
	}
123

  
124
	@Test
125
	public void testDistanceResultWithAuthorsCase1() {
126

  
127
		Config config = getResultWithAuthorsConf(0.5, 0.5);
128

  
129
		List<String> authors = Lists.newArrayList("P. Manghi", "A. Bardi", "M. Artini");
130
		MapDocument resA = result(config, "A", "Search for the Standard Model Higgs boson", "2003", authors);
131
		MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", authors);
132

  
133
		double d = new PaceDocumentDistance().between(resA, resB, config);
134
		System.out.println(String.format(" d ---> %s", d));
135

  
136
		assertTrue((d > 0.9) && (d < 1.0));
137
	}
138

  
139
	@Test
140
	public void testDistanceResultWithAuthorsCase2() {
141

  
142
		Config config = getResultWithAuthorsConf(0.5, 0.5);
143

  
144
		MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", Lists.newArrayList("P. Manghi", "A. Bardi", "M. Artini"));
145
		MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", Lists.newArrayList("P. Manghi", "C. Atzori", "A. Bardi"));
146

  
147
		double d = new PaceDocumentDistance().between(resA, resB, config);
148
		System.out.println(String.format(" d ---> %s", d));
149

  
150
		// assertTrue((d > 0.8) && (d < 0.9));
151
	}
152

  
153
	@Test
154
	public void testDistanceResultWithAuthorsCase3() {
155

  
156
		Config config = getResultWithAuthorsConf(0.5, 0.5);
157

  
158
		MapDocument resA =
159
				result(config, "A", "Search the Standard Model Higgs boson", "2003", Lists.newArrayList("A. Bardi", "Paolo Manghi", "Michele Artini"));
160
		MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", Lists.newArrayList("P. Manghi", "A. Bardi"));
161

  
162
		double d = new PaceDocumentDistance().between(resA, resB, config);
163
		System.out.println(String.format(" d ---> %s", d));
164

  
165
		assertTrue((d > 0.8) && (d < 0.9));
166
	}
167

  
168
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.DynConf;
9
import eu.dnetlib.pace.config.Type;
10
import eu.dnetlib.pace.model.FieldList;
11
import eu.dnetlib.pace.model.FieldListImpl;
12
import eu.dnetlib.pace.model.FieldValueImpl;
13
import eu.dnetlib.pace.model.document.MapDocument;
14

  
15
public class ClusteringCombinerTest extends AbstractProtoPaceTest {
16

  
17
	private Config config;
18

  
19
	@Before
20
	public void setUp() {
21
		config = DynConf.load(cfg);
22
	}
23

  
24
	@Test
25
	public void testCombine() {
26
		String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission";
27
		MapDocument result = result(config, "A", title, "2013");
28
		FieldList fl = new FieldListImpl();
29
		fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty"));
30
		result.getFieldMap().put("desc", fl);
31
		System.out.println(title);
32
		System.out.println(ClusteringCombiner.combine(result, config));
33
	}
34

  
35
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.DynConf;
9
import eu.dnetlib.pace.config.Type;
10
import eu.dnetlib.pace.model.FieldList;
11
import eu.dnetlib.pace.model.FieldListImpl;
12
import eu.dnetlib.pace.model.FieldValueImpl;
13
import eu.dnetlib.pace.model.document.MapDocument;
14

  
15
public class BlacklistAwareClusteringCombinerTest extends AbstractProtoPaceTest {
16

  
17
	private Config config;
18

  
19
	@Before
20
	public void setUp() {
21
		config = DynConf.load(cfg);
22
	}
23

  
24
	@Test
25
	public void testCombine() {
26
		MapDocument result = result(config, "A", "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission", "2013");
27
		FieldList fl = new FieldListImpl();
28
		fl.add(new FieldValueImpl(Type.String, "desc", "hello world description pipeline"));
29
		result.getFieldMap().put("desc", fl);
30
		result.getFieldMap().get("title").add(new FieldValueImpl(Type.String, "title", "lorem ipsum cabalie qwerty"));
31
		System.out.println(BlacklistAwareClusteringCombiner.filterAndCombine(result, config, config.blacklists()));
32
	}
33
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java
1
package eu.dnetlib.pace;
2

  
3
import java.util.List;
4

  
5
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
6
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
8
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
9
import eu.dnetlib.data.proto.PersonProtos.Person;
10
import eu.dnetlib.data.proto.ResultProtos.Result;
11
import eu.dnetlib.pace.config.Config;
12
import eu.dnetlib.pace.config.DynConf;
13
import eu.dnetlib.pace.config.Type;
14
import eu.dnetlib.pace.model.Field;
15
import eu.dnetlib.pace.model.FieldValueImpl;
16
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
17
import eu.dnetlib.pace.model.document.MapDocument;
18

  
19
public abstract class AbstractProtoPaceTest {
20

  
21
	protected static String cfg = "pace.conf { " + "clustering { " + "acronyms     { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, "
22
			+ "ngrampairs   { fields = [title], params = { max = 1, ngramLen = 3} }, " + "suffixprefix { fields = [title], params = { max = 1, len = 3 } } "
23
			+ "}, " + "conditions { " + "yearMatch { fields = [dateofacceptance] }, " + "titleVersionMatch { fields = [title] } }," + "model { "
24
			+ "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false }, "
25
			+ "dateofacceptance/value { algo = Null, type = String, weight = 0.0, ignoreMissing = true } }, " + "blacklists = [title]" + "}";
26

  
27
	protected Config getOrganizationSimpleConf() {
28
		Config config =
29
				DynConf.load("pace.conf { " + "conditions { }," + "model { "
30
						+ "legalname/value { algo = JaroWinkler, type = String, weight = 0.6, ignoreMissing = false }, "
31
						+ "legalshortname/value { algo = JaroWinkler, type = String, weight = 0.4, ignoreMissing = true } }" + "}");
32
		return config;
33
	}
34

  
35
	protected Config getResultConf() {
36
		return DynConf.load("pace.conf { " + "conditions { " + "yearMatch { fields = [dateofacceptance] }, " + "titleVersionMatch { fields = [title] } },"
37
				+ "model { " + "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false }, "
38
				+ "dateofacceptance/value { algo = Null, type = String, weight = 0.0, ignoreMissing = true } } " + "}");
39
	}
40

  
41
	protected Config getResultWithAuthorsConf(final double wTitle, final double wAuthors) {
42
		return DynConf.load("pace.conf { " + "conditions { " + "yearMatch { fields = [dateofacceptance] }, " + "titleVersionMatch { fields = [title] } },"
43
				+ "model { " + "title/value { algo = JaroWinkler, type = String, weight = " + String.valueOf(wTitle) + ", ignoreMissing = false }, "
44
				+ "dateofacceptance/value { algo = Null, type = String, weight = 0.0, ignoreMissing = true }, "
45
				+ "author/metadata/secondnames/value { algo = SortedLevel2JaroWinkler, type = List, weight = " + String.valueOf(wAuthors)
46
				+ ", ignoreMissing = true } } " + "}");
47
	}
48

  
49
	protected Config getResultSimpleConf() {
50
		return DynConf.load("pace.conf { " + "conditions { }," + "model { "
51
				+ "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false } } " + "}");
52
	}
53

  
54
	protected Config getResultSimpleConf2() {
55
		return DynConf.load("pace.conf { " + "model { " + "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false } } " + "}");
56
	}
57

  
58
	protected MapDocument result(final Config config, final String id, final String title, final String date) {
59
		return result(config, id, title, date, null);
60
	}
61

  
62
	protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> authors) {
63
		Result.Metadata.Builder builder = Result.Metadata.newBuilder();
64
		if (title != null) {
65
			builder.addTitle(getStruct(title, getQualifier("main", "dnet:titles")));
66
		}
67
		if (date != null) {
68
			builder.setDateofacceptance(sf(date));
69
		}
70
		if ((authors != null) && !authors.isEmpty()) {
71
			for (String author : authors) {
72

  
73
				eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false);
74
				Person.Builder pBuilder = Person.newBuilder();
75

  
76
				if (p.isAccurate()) {
77
					pBuilder.getMetadataBuilder().setFirstname(sf(p.getNormalisedFirstName()));
78
					pBuilder.getMetadataBuilder().addSecondnames(sf(p.getNormalisedSurname()));
79
				}
80

  
81
				pBuilder.getMetadataBuilder().setFullname(sf(p.getNormalisedFullname()));
82
				builder.addAuthor(pBuilder);
83
			}
84
		}
85

  
86
		return ProtoDocumentBuilder.newInstance(id, builder.build(), config.fields());
87
	}
88

  
89
	protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
90
		Organization.Metadata.Builder builder = Organization.Metadata.newBuilder();
91
		if (legalName != null) {
92
			builder.setLegalname(sf(legalName));
93
		}
94
		if (legalShortName != null) {
95
			builder.setLegalshortname(sf(legalShortName));
96
		}
97

  
98
		return ProtoDocumentBuilder.newInstance(id, builder.build(), config.fields());
99
	}
100

  
101
	protected Field title(final String s) {
102
		return new FieldValueImpl(Type.String, "title", s);
103
	}
104

  
105
	protected static StringField.Builder sf(final String s) {
106
		return StringField.newBuilder().setValue(s);
107
	}
108

  
109
	protected static Qualifier.Builder getQualifier(final String classname, final String schemename) {
110
		return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename);
111
	}
112

  
113
	protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) {
114
		return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
115
	}
116

  
117
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import org.junit.Test;
4

  
5
import eu.dnetlib.data.proto.KindProtos.Kind;
6

  
7
public class OafDecoderTest {
8

  
9
	@Test
10
	public void test() {
11

  
12
		OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity);
13

  
14
		System.out.println(decoder.asXml());
15
	}
16
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import org.junit.Before;
7
import org.junit.Test;
8

  
9
import com.google.protobuf.Descriptors.FieldDescriptor;
10

  
11
import eu.dnetlib.data.proto.OafProtos.OafRel;
12
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
13
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
14
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
15

  
16
public class OafRelDecoderTest {
17

  
18
	private OafRel oafRel;
19

  
20
	@Before
21
	public void setUp() {
22
		oafRel = OafTest.getPersonResult("ID_1", "ID_2", "1", "isAuthor");
23
	}
24

  
25
	@Test
26
	public void testSetClass() {
27

  
28
		OafRelDecoder d1 = OafRelDecoder.decode(oafRel);
29

  
30
		assertNotNull(d1);
31
		assertEquals("isAuthor", d1.getRelClass());
32

  
33
		OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("hasAuthor").build());
34

  
35
		assertEquals("hasAuthor", d2.getRelClass());
36
		assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassid());
37
		assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassname());
38

  
39
		FieldDescriptor fd = Authorship.getDescriptor().findFieldByName("ranking");
40
		assertEquals(d1.getSubRel().getField(fd), d2.getSubRel().getField(fd));
41
	}
42

  
43
	@Test
44
	public void testGetCF() {
45
		assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCF(RelType.personResult, SubRelType.authorship, Authorship.RelName.isAuthorOf));
46
		assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCF(RelType.personResult, SubRelType.authorship, "isAuthorOf"));
47
	}
48

  
49
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/resources/eu/dnetlib/data/transform/record.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oaf="http://namespace.openaire.eu/oaf">
3
  <header xmlns="http://namespace.openaire.eu/">
4
    <dri:objIdentifier>od______1064::fe947e59cf7db2f039b4c8cc25693fb0</dri:objIdentifier>
5
    <dri:recordIdentifier>95168db1-d57e-4b99-855b-993cf91d1283_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:ora.ouls.ox.ac.uk:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dri:recordIdentifier>
6
    <dri:dateOfCollection/>
7
    <dri:mdFormat/>
8
    <dri:mdFormatInterpretation/>
9
    <dri:repositoryId>2a02b271-0756-453c-b2f0-8c472a8806a5_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
10
    <dr:objectIdentifier/>
11
    <dr:dateOfCollection>2013-05-10T16:04:02Z</dr:dateOfCollection>
12
    <oaf:datasourceprefix>od______1064</oaf:datasourceprefix>
13
  </header>
14
  <metadata xmlns="http://namespace.openaire.eu/">
15
    <dc:creator>Uphoff, S</dc:creator>
16
    <dc:creator>Holden, SJ</dc:creator>
17
    <dc:dateAccepted>2011-01-01</dc:dateAccepted>
18
    <dc:description>The analysis of structure and dynamics of biomolecules is important for understanding their function. Toward this aim, we introduce a method called 'switchable FRET', which combines single-molecule fluorescence resonance energy transfer (FRET) with reversible photoswitching of fluorophores. Typically, single-molecule FRET is measured within a single donor-acceptor pair and reports on only one distance. Although multipair FRET approaches that monitor multiple distances have been developed, they are technically challenging and difficult to extend, mainly because of their reliance on spectrally distinct acceptors. In contrast, switchable FRET sequentially probes FRET between a single donor and spectrally identical photoswitchable acceptors, dramatically reducing the experimental and analytical complexity and enabling direct monitoring of multiple distances. Our experiments on DNA molecules, a protein-DNA complex and dynamic Holliday junctions demonstrate the potential of switchable FRET for studying dynamic, multicomponent biomolecules.          </dc:description>
19
    <dc:identifier>http://pub.uni-bielefeld.de/publication/2303387</dc:identifier>
20
    <dc:language>eng</dc:language>
21
    <dc:title>Monitoring multiple distances within a single molecule using switchable FRET.</dc:title>
22
    <dc:source>Symplectic Elements at Oxford</dc:source>
23
    <dc:source>PubMed (http://www.ncbi.nlm.nih.gov/pubmed/)</dc:source>
24
    <dc:source>Web of Science (Lite) (http://apps.webofknowledge.com/summary.do)</dc:source>
25
    <dc:subject>Biotinylation</dc:subject>
26
    <dc:subject>Computer Simulation</dc:subject>
27
    <dr:CobjCategory>0001</dr:CobjCategory>
28
    <dr:CobjIdentifier>urn:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dr:CobjIdentifier>
29
    <dr:CobjIdentifier>pii:nmeth.1502</dr:CobjIdentifier>
30
    <dr:CobjIdentifier>local:71163</dr:CobjIdentifier>
31
    <dr:CobjIdentifier>eissn:1548-7105</dr:CobjIdentifier>
32
    <dr:CobjIdentifier>doi:10.1038/nmeth.1502</dr:CobjIdentifier>
33
    <dr:CobjIdentifier>issn:1548-7091</dr:CobjIdentifier>
34
    <oaf:accessrights>EMBARGO</oaf:accessrights>
35
    <oaf:collectedDatasourceid>issn____::12345678</oaf:collectedDatasourceid>
36
	<oaf:hostedBy name="DOAJ" id="doaj____::1234"/>
37
    <oaf:collectedFrom name="My favourite journal" id="issn____::12345678"/>
38
    <oaf:fulltext>http://xyz</oaf:fulltext>
39
    <oaf:journal issn="12345678" eissn="e1234567" lissn="l1234567">My favourite journal</oaf:journal>
40
    <oaf:journal issn="12345678" eissn="e1234567">My second favourite journal</oaf:journal>  
41
    <oaf:identifier identifierType="doi">10.1038/nmeth.1502</oaf:identifier>
42
    <oaf:reference identifier="IPR004915" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMC155133/databaseLinks/INTERPRO/1/json" source="INTERPRO" title="Nonstructural protein NS-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/IPR004915</oaf:reference>
43
	<oaf:reference identifier="IPRXXXXXX" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMCYYYYYY/databaseLinks/INTERPRO/2/json" source="INTERPRO" title="Nonstructural protein XY-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/TTTTTTTTT</oaf:reference>  
44
  </metadata>
45
</record>
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/test/resources/eu/dnetlib/data/transform/simpleRecord.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record>
3
  <metadata>
4
    <a>A text value</a>
5
    <b attr="attribute value"/>
6
  </metadata>
7
</record>
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/main/java/eu/dnetlib/actionmanager/actions/infopackage/DMFInfoPackageToHbaseXsltFunctions.java
1
package eu.dnetlib.actionmanager.actions.infopackage;
2

  
3
import org.w3c.dom.NodeList;
4

  
5
import eu.dnetlib.data.transform.xml.DmfToHbaseXsltFunctions;
6

  
7
public class DMFInfoPackageToHbaseXsltFunctions extends DmfToHbaseXsltFunctions {
8

  
9
	// dnet:oafInstanceFromMDStore($resultId, $hostedby, //dc:identifier, "userclaim:doi", "0.9")
10
	// public static String oafInstanceFromInfoPackage(
11
	// final String resultId,
12
	// final String hostedbyId,
13
	// final String hostedbyName,
14
	// final String provenance,
15
	// final String trust,
16
	// final NodeList nodelist) {
17
	// return oafInstanceFromDMF(resultId, hostedbyId, hostedbyName, provenance, trust, nodelist);
18
	// }
19

  
20
	// dnet:oafPersonFromInfoPackage($personId, ., "userclaim:doi", "0.9")
21
	public static String oafPersonFromInfoPackage(final String personId,
22
			final String fullname,
23
			final String provenance,
24
			final String trust,
25
			final String collectedFromId,
26
			final String collectedFromName,
27
			final String originalId,
28
			final String dateOfCollection) {
29
		return oafPerson_FromDMF(personId, fullname, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection);
30
	}
31

  
32
	public static String oafResultFromInfoPackage(final String resultId,
33
			final String provenance,
34
			final String trust,
35
			final String hostedbyId,
36
			final String hostedbyName,
37
			final String collectedFromId,
38
			final String collectedFromName,
39
			final String originalId,
40
			final String dateOfCollection,
41
			final NodeList nodelist) {
42
		return oafResult_FromDMF(resultId, provenance, trust, hostedbyId, hostedbyName, collectedFromId, collectedFromName, originalId, dateOfCollection,
43
				nodelist);
44
	}
45

  
46
	// dnet:oafPersonResultFromInfoPackage($resultId, $oafPerson, position(), "userclaim:doi", "0.9")
47
	public static String oafPersonResultFromInfoPackage(final String personId,
48
			final String resultId,
49
			final int rank,
50
			final String relClass,
51
			final String provenance,
52
			final String trust) {
53
		return oafPersonResult_Authorship_FromDMF(personId, resultId, rank, relClass, provenance, trust);
54
	}
55

  
56
	// dnet:oafResultProjectFromInfoPackage($resultId, $projectId, "userclaim:doi", "0.9")
57
	public static String oafResultProjectFromInfoPackage(final String sourceId,
58
			final String targetId,
59
			final String relClass,
60
			final String provenance,
61
			final String trust) {
62
		return oafResultProject_Outcome_FromDMF(sourceId, targetId, relClass, provenance, trust);
63
	}
64

  
65
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/main/java/eu/dnetlib/actionmanager/actions/infopackage/DataciteInfoPackageToHbaseXsltFunctions.java
1
package eu.dnetlib.actionmanager.actions.infopackage;
2

  
3
import org.w3c.dom.NodeList;
4

  
5
import eu.dnetlib.data.transform.xml.DataciteToHbaseXsltFunctions;
6

  
7
public class DataciteInfoPackageToHbaseXsltFunctions extends DataciteToHbaseXsltFunctions {
8

  
9
	public static String oafDataCiteResultFromInfoPackage(final String resultId,
10
			final NodeList metadata,
11
			final NodeList titles,
12
			final NodeList subjects,
13
			final NodeList publisher,
14
			final NodeList descriptions,
15
			final NodeList dates,
16
			final NodeList resourceTypes,
17
			final NodeList formats,
18
			final NodeList sizes,
19
			final NodeList languages,
20
			final NodeList rights,
21
			final NodeList version,
22
			final String provenance,
23
			final String trust,
24
			final String hostedbyId,
25
			final String hostedbyName,
26
			final String collectedfromId,
27
			final String collectedfromName,
28
			final String originalId,
29
			final String instanceUri,
30
			final String dateOfCollection) {
31

  
32
		return oafResult_FromDatacite(resultId, metadata, titles, subjects, publisher, descriptions, dates, resourceTypes, formats, sizes, languages, rights,
33
				version, provenance, trust, hostedbyId, hostedbyName, collectedfromId, collectedfromName, originalId, instanceUri, dateOfCollection);
34

  
35
	}
36

  
37
	public static String oafDataCitePersonFromInfoPackage(final String personId,
38
			final String fullname,
39
			final String provenanceAction,
40
			final String trust,
41
			final String collectedfromId,
42
			final String collectedfromName,
43
			final String originalId,
44
			final String dateOfCollection) {
45

  
46
		return oafPerson_FromDatacite(personId, fullname, provenanceAction, trust, collectedfromId, collectedfromName, originalId, dateOfCollection);
47

  
48
	}
49

  
50
	public static String oafDataCitePersonResultFromInfoPackage(final String personId,
51
			final String resultId,
52
			final int rank,
53
			final String relClass,
54
			final String provenanceAction,
55
			final String trust) {
56
		return oafPersonResult_Authorship_FromDatacite(personId, resultId, rank, relClass, provenanceAction, trust);
57
	}
58

  
59
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/main/java/eu/dnetlib/pace/model/ProtoDocumentBuilder.java
1
package eu.dnetlib.pace.model;
2

  
3
import java.util.Arrays;
4
import java.util.List;
5
import java.util.Map;
6

  
7
import org.apache.commons.lang.StringUtils;
8

  
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Maps;
11
import com.google.protobuf.Descriptors.FieldDescriptor;
12
import com.google.protobuf.GeneratedMessage;
13

  
14
import eu.dnetlib.pace.model.document.DocumentBuilder;
15
import eu.dnetlib.pace.model.document.MapDocument;
16

  
17
/**
18
 * The Class ProtoDocumentBuilder.
19
 */
20
public class ProtoDocumentBuilder extends DocumentBuilder {
21

  
22
	/**
23
	 * New instance.
24
	 * 
25
	 * @param id
26
	 *            the id
27
	 * @param metadata
28
	 *            the metadata
29
	 * @param fields
30
	 *            the fields
31
	 * @return the map document
32
	 */
33
	public static MapDocument newInstance(final String id, final GeneratedMessage metadata, final List<FieldDef> fields) {
34
		return newInstance(id, new ProtoDocumentBuilder().generateFieldMap(metadata, fields));
35
	}
36

  
37
	/**
38
	 * Generate field map.
39
	 * 
40
	 * @param metadata
41
	 *            the metadata
42
	 * @param fields
43
	 *            the fields
44
	 * @return the map
45
	 */
46
	private Map<String, FieldList> generateFieldMap(final GeneratedMessage metadata, final List<FieldDef> fields) {
47
		Map<String, FieldList> fieldMap = Maps.newHashMap();
48

  
49
		for (FieldDef fd : fields) {
50
			fieldMap.put(fd.getName(), processPath(fd, metadata, Arrays.asList(fd.getPath().split(FieldDef.PATH_SEPARATOR))));
51
		}
52

  
53
		return fieldMap;
54
	}
55

  
56
	/**
57
	 * Process path.
58
	 * 
59
	 * @param fd
60
	 *            the fd
61
	 * @param message
62
	 *            the message
63
	 * @param list
64
	 *            the list
65
	 * @return the field list
66
	 */
67
	public FieldList processPath(final FieldDef fd, final GeneratedMessage message, final List<String> list) {
68

  
69
		final FieldList response = new FieldListImpl(fd.getName());
70

  
71
		if (list.isEmpty()) throw new RuntimeException("ProtoBuf navigation path is empty");
72

  
73
		FieldDescriptor desc = message.getDescriptorForType().findFieldByName(list.get(0));
74
		if (desc != null) {
75
			if (desc.isRepeated()) {
76
				int count = message.getRepeatedFieldCount(desc);
77
				for (int i = 0; i < count; i++) {
78
					Object field = message.getRepeatedField(desc, i);
79
					response.addAll(generateFields(fd, field, list));
80
				}
81
			} else {
82
				Object field = message.getField(desc);
83
				response.addAll(generateFields(fd, field, list));
84
			}
85
		} else throw new RuntimeException("Invalid protobuf path (field not found): " + StringUtils.join(list, ">"));
86

  
87
		return response;
88
	}
89

  
90
	/**
91
	 * Generate fields.
92
	 * 
93
	 * @param fd
94
	 *            the fd
95
	 * @param field
96
	 *            the field
97
	 * @param list
98
	 *            the list
99
	 * @return the list
100
	 */
101
	private List<Field> generateFields(final FieldDef fd, final Object field, final List<String> list) {
102
		if (field instanceof GeneratedMessage) {
103
			if (list.size() > 1) return processPath(fd, (GeneratedMessage) field, list.subList(1, list.size()));
104
			else throw new RuntimeException("No primitive type found");
105
		} else {
106
			if (list.size() == 1) {
107
				switch (fd.getType()) {
108
				case Int:
109
				case String:
110
					return Lists.newArrayList(new FieldValueImpl(fd.getType(), fd.getName(), field));
111
				case List:
112
					FieldListImpl fl = new FieldListImpl(fd.getName());
113
					fl.add(new FieldValueImpl(fd.getType(), fd.getName(), field));
114
					return fl;
115
				default:
116
					throw new IllegalArgumentException("invalid type: " + fd.getType().toString());
117
				}
118
			} else throw new RuntimeException("Found a primitive type before the path end");
119
		}
120
	}
121

  
122
}
modules/dnet-openaireplus-mapping-utils/branches/2.2/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetOafXsltFunctions.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.nio.charset.Charset;
4
import java.security.MessageDigest;
5
import java.util.List;
6
import java.util.Map;
7

  
8
import org.apache.commons.codec.binary.Base64;
9
import org.apache.commons.codec.binary.Hex;
10
import org.apache.commons.lang.StringUtils;
11
import org.w3c.dom.Node;
12
import org.w3c.dom.NodeList;
13

  
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Lists;
17
import com.google.common.collect.Maps;
18
import com.google.protobuf.Descriptors.Descriptor;
19
import com.google.protobuf.Descriptors.FieldDescriptor;
20
import com.google.protobuf.InvalidProtocolBufferException;
21
import com.google.protobuf.Message;
22
import com.google.protobuf.Message.Builder;
23
import com.google.protobuf.ProtocolMessageEnum;
24

  
25
import eu.dnetlib.data.proto.FieldTypeProtos.BoolField;
26
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
27
import eu.dnetlib.data.proto.FieldTypeProtos.IntField;
28
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
29
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
30
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
31
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
32
import eu.dnetlib.data.proto.KindProtos.Kind;
33
import eu.dnetlib.data.proto.OafProtos.Oaf;
34
import eu.dnetlib.data.proto.OafProtos.OafEntity;
35
import eu.dnetlib.data.proto.OafProtos.OafRel;
36
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
37
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
38
import eu.dnetlib.data.proto.TypeProtos.Type;
39
import eu.dnetlib.miscutils.collections.Pair;
40
import eu.dnetlib.miscutils.iterators.IterablePair;
41

  
42
public abstract class AbstractDNetOafXsltFunctions {
43

  
44
	private static final int MAX_NSPREFIX_LEN = 12;
45
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
46
	protected static Map<String, String> code2name = Maps.newHashMap();
47

  
48
	public static Predicate<String> urlFilter = new Predicate<String>() {
49

  
50
		@Override
51
		public boolean apply(final String s) {
52
			return s.trim().matches(URL_REGEX);
53
		}
54
	};
55

  
56
	static {
57
		code2name.put("AF", "AFGHANISTAN");
58
		code2name.put("CX", "CHRISTMAS ISLAND");
59
		code2name.put("CC", "COCOS (KEELING) ISLANDS");
60
		code2name.put("aka", "Akan");
61
		code2name.put("CO", "Colombia");
62
		code2name.put("result", "result");
63
		code2name.put("AX", "ÅLAND ISLANDS");
64
		code2name.put("AS", "AMERICAN SAMOA");
65
		code2name.put("AD", "ANDORRA");
66
		code2name.put("AO", "ANGOLA");
67
		code2name.put("AI", "ANGUILLA");
68
		code2name.put("AQ", "ANTARCTICA");
69
		code2name.put("AG", "ANTIGUA AND BARBUDA");
70
		code2name.put("AW", "ARUBA");
71
		code2name.put("BS", "BAHAMAS");
72
		code2name.put("BB", "BARBADOS");
73
		code2name.put("BZ", "BELIZE");
74
		code2name.put("BM", "BERMUDA");
75
		code2name.put("BT", "BHUTAN");
76
		code2name.put("BQ", "BONAIRE, SINT EUSTATIUS AND SABA");
77
		code2name.put("BV", "BOUVET ISLAND");
78
		code2name.put("IO", "BRITISH INDIAN OCEAN TERRITORY");
79
		code2name.put("KY", "CAYMAN ISLANDS");
80
		code2name.put("TD", "CHAD");
81
		code2name.put("BG", "Bulgaria");
82
		code2name.put("AT", "Austria");
83
		code2name.put("BE", "Belgium");
84
		code2name.put("CA", "Canada");
85
		code2name.put("BJ", "Benin");
86
		code2name.put("CN", "China (People's Republic of)");
87
		code2name.put("AU", "Australia");
88
		code2name.put("BR", "Brazil");
89
		code2name.put("AR", "Argentina");
90
		code2name.put("BF", "Burkina Faso");
91
		code2name.put("AL", "Albania");
92
		code2name.put("CV", "Cape Verde");
93
		code2name.put("AZ", "Azerbaijan");
94
		code2name.put("BA", "Bosnia and Herzegovina");
95
		code2name.put("AM", "Armenia");
96
		code2name.put("DZ", "Algeria");
97
		code2name.put("CM", "Cameroon");
98
		code2name.put("BD", "Bangladesh");
99
		code2name.put("KH", "Cambodia");
100
		code2name.put("CL", "Chile");
101
		code2name.put("BW", "Botswana");
102
		code2name.put("BY", "Belarus");
103
		code2name.put("BO", "Bolivia");
104
		code2name.put("CF", "Central African Republic");
105
		code2name.put("BH", "Bahrain");
106
		code2name.put("BN", "Brunei Darussalam");
107
		code2name.put("BI", "Burundi");
108
		code2name.put("KM", "COMOROS");
109
		code2name.put("CK", "COOK ISLANDS");
110
		code2name.put("CW", "CURAÇAO");
111
		code2name.put("DJ", "DJIBOUTI");
112
		code2name.put("DM", "DOMINICA");
113
		code2name.put("GQ", "EQUATORIAL GUINEA");
114
		code2name.put("ER", "ERITREA");
115
		code2name.put("FK", "FALKLAND ISLANDS (MALVINAS)");
116
		code2name.put("PF", "FRENCH POLYNESIA");
117
		code2name.put("TF", "FRENCH SOUTHERN TERRITORIES");
118
		code2name.put("GI", "GIBRALTAR");
119
		code2name.put("GR", "GREECE");
120
		code2name.put("GD", "GRENADA");
121
		code2name.put("GP", "GUADELOUPE");
122
		code2name.put("GU", "GUAM");
123
		code2name.put("GG", "GUERNSEY");
124
		code2name.put("HM", "HEARD ISLAND AND MCDONALD ISLANDS");
125
		code2name.put("VA", "HOLY SEE (VATICAN CITY STATE)");
126
		code2name.put("IQ", "IRAQ");
127
		code2name.put("IM", "ISLE OF MAN");
128
		code2name.put("JE", "JERSEY");
129
		code2name.put("KI", "KIRIBATI");
130
		code2name.put("KP", "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF");
131
		code2name.put("LR", "LIBERIA");
132
		code2name.put("MQ", "MARTINIQUE");
133
		code2name.put("MR", "MAURITANIA");
134
		code2name.put("YT", "MAYOTTE");
135
		code2name.put("FM", "MICRONESIA, FEDERATED STATES OF");
136
		code2name.put("MN", "MONGOLIA");
137
		code2name.put("MS", "MONTSERRAT");
138
		code2name.put("NR", "NAURU");
139
		code2name.put("NU", "NIUE");
140
		code2name.put("NF", "NORFOLK ISLAND");
141
		code2name.put("MP", "NORTHERN MARIANA ISLANDS");
142
		code2name.put("PW", "PALAU");
143
		code2name.put("PY", "PARAGUAY");
144
		code2name.put("PN", "PITCAIRN");
145
		code2name.put("PR", "PUERTO RICO");
146
		code2name.put("RE", "RÉUNION");
147
		code2name.put("BL", "SAINT BARTHÉLEMY");
148
		code2name.put("SH", "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA");
149
		code2name.put("KN", "SAINT KITTS AND NEVIS");
150
		code2name.put("LC", "SAINT LUCIA");
151
		code2name.put("MF", "SAINT MARTIN (FRENCH PART)");
152
		code2name.put("PM", "SAINT PIERRE AND MIQUELON");
153
		code2name.put("VC", "SAINT VINCENT AND THE GRENADINES");
154
		code2name.put("FI", "Finland");
155
		code2name.put("NO", "Norway");
156
		code2name.put("CZ", "Czech Republic");
157
		code2name.put("IL", "Israel");
158
		code2name.put("EE", "Estonia");
159
		code2name.put("IT", "Italy");
160
		code2name.put("RO", "Romania");
161
		code2name.put("HU", "Hungary");
162
		code2name.put("NL", "Netherlands");
163
		code2name.put("FR", "France");
164
		code2name.put("IS", "Iceland");
165
		code2name.put("LV", "Latvia");
166
		code2name.put("PT", "Portugal");
167
		code2name.put("MT", "Malta");
168
		code2name.put("DK", "Denmark");
169
		code2name.put("IE", "Ireland");
170
		code2name.put("MA", "Morocco");
171
		code2name.put("IN", "India");
172
		code2name.put("KR", "Korea (Republic of)");
173
		code2name.put("MX", "Mexico");
174
		code2name.put("HT", "Haiti");
175
		code2name.put("EG", "Egypt");
176
		code2name.put("LT", "Lithuania");
177
		code2name.put("HR", "Croatia");
178
		code2name.put("LU", "Luxembourg");
179
		code2name.put("PG", "Papua New Guinea");
180
		code2name.put("GT", "Guatemala");
181
		code2name.put("ID", "Indonesia");
182
		code2name.put("NG", "Nigeria");
183
		code2name.put("NZ", "New Zealand");
184
		code2name.put("MK", "Former Yugoslav Republic of Macedonia");
185
		code2name.put("JP", "Japan");
186
		code2name.put("KZ", "Kazakhstan");
187
		code2name.put("NE", "Niger");
188
		code2name.put("ME", "Montenegro");
189
		code2name.put("GE", "Georgia");
190
		code2name.put("JO", "Jordan");
191
		code2name.put("LB", "Lebanon");
192
		code2name.put("PS", "Palestinian-administered areas");
193
		code2name.put("CR", "Costa Rica");
194
		code2name.put("PH", "Philippines");
195
		code2name.put("KE", "Kenya");
196
		code2name.put("CI", "Cote d'Ivoire");
197
		code2name.put("IR", "Iran (Islamic Republic of)");
198
		code2name.put("NI", "Nicaragua");
199
		code2name.put("KG", "Kyrgyzstan");
200
		code2name.put("EC", "Ecuador");
201
		code2name.put("MY", "Malaysia");
202
		code2name.put("FO", "Faroe Islands");
203
		code2name.put("ET", "Ethiopia");
204
		code2name.put("GH", "Ghana");
205
		code2name.put("GN", "Guinea");
206
		code2name.put("RW", "Rwanda");
207
		code2name.put("MG", "Madagascar");
208
		code2name.put("PE", "Peru");
209
		code2name.put("MW", "Malawi");
210
		code2name.put("JM", "Jamaica");
211
		code2name.put("HK", "Hong Kong");
212
		code2name.put("PK", "Pakistan");
213
		code2name.put("MZ", "Mozambique");
214
		code2name.put("LS", "Lesotho");
215
		code2name.put("NA", "Namibia");
216
		code2name.put("DO", "Dominican Republic");
217
		code2name.put("HN", "Honduras");
218
		code2name.put("CD", "Congo (Democratic Republic of)");
219
		code2name.put("ML", "Mali");
220
		code2name.put("NP", "Nepal");
221
		code2name.put("MU", "Mauritius");
222
		code2name.put("CU", "Cuba");
223
		code2name.put("PA", "Panama");
224
		code2name.put("LI", "Liechtenstein");
225
		code2name.put("GL", "Greenland");
226
		code2name.put("GA", "Gabon");
227
		code2name.put("CG", "Congo");
228
		code2name.put("OM", "Oman");
229
		code2name.put("KW", "Kuwait");
230
		code2name.put("QA", "Qatar");
231
		code2name.put("GY", "Guyana");
232
		code2name.put("GF", "French Guiana");
233
		code2name.put("FJ", "Fiji");
234
		code2name.put("NC", "New Caledonia");
235
		code2name.put("MM", "Myanmar");
236
		code2name.put("GW", "Guinea-Bissau");
237
		code2name.put("WS", "SAMOA");
238
		code2name.put("ST", "SAO TOME AND PRINCIPE");
239
		code2name.put("SL", "SIERRA LEONE");
240
		code2name.put("SX", "SINT MAARTEN (DUTCH PART)");
241
		code2name.put("SB", "SOLOMON ISLANDS");
242
		code2name.put("GS", "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS");
243
		code2name.put("SS", "SOUTH SUDAN");
244
		code2name.put("SJ", "SVALBARD AND JAN MAYEN");
245
		code2name.put("TL", "TIMOR-LESTE");
246
		code2name.put("TK", "TOKELAU");
247
		code2name.put("TO", "TONGA");
248
		code2name.put("TC", "TURKS AND CAICOS ISLANDS");
249
		code2name.put("TV", "TUVALU");
250
		code2name.put("GB", "UNITED KINGDOM");
251
		code2name.put("UM", "UNITED STATES MINOR OUTLYING ISLANDS");
252
		code2name.put("VU", "VANUATU");
253
		code2name.put("VI", "VIRGIN ISLANDS, U.S.");
254
		code2name.put("WF", "WALLIS AND FUTUNA");
255
		code2name.put("EH", "WESTERN SAHARA");
256
		code2name.put("EU", "European Union");
257
		code2name.put("abk", "Abkhazian");
258
		code2name.put("aar", "Afar");
259
		code2name.put("afr", "Afrikaans");
260
		code2name.put("alb/sqi", "Albanian");
261
		code2name.put("amh", "Amharic");
262
		code2name.put("ara", "Arabic");
263
		code2name.put("arg", "Aragonese");
264
		code2name.put("arm/hye", "Armenian");
265
		code2name.put("asm", "Assamese");
266
		code2name.put("ava", "Avaric");
267
		code2name.put("ave", "Avestan");
268
		code2name.put("aym", "Aymara");
269
		code2name.put("aze", "Azerbaijani");
270
		code2name.put("bam", "Bambara");
271
		code2name.put("bak", "Bashkir");
272
		code2name.put("baq/eus", "Basque");
273
		code2name.put("bel", "Belarusian");
274
		code2name.put("ben", "Bengali");
275
		code2name.put("bih", "Bihari");
276
		code2name.put("bis", "Bislama");
277
		code2name.put("nob", "Bokmål, Norwegian; Norwegian Bokmål");
278
		code2name.put("bos", "Bosnian");
279
		code2name.put("bre", "Breton");
280
		code2name.put("bul", "Bulgarian");
281
		code2name.put("bur/mya", "Burmese");
282
		code2name.put("cat", "Catalan; Valencian");
283
		code2name.put("cha", "Chamorro");
284
		code2name.put("che", "Chechen");
285
		code2name.put("nya", "Chewa; Chichewa; Nyanja");
286
		code2name.put("chi/zho", "Chinese");
287
		code2name.put("chu", "Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
288
		code2name.put("chv", "Chuvash");
289
		code2name.put("cor", "Cornish");
290
		code2name.put("cos", "Corsican");
291
		code2name.put("cre", "Cree");
292
		code2name.put("scr/hrv", "Croatian");
293
		code2name.put("cze/ces", "Czech");
294
		code2name.put("dan", "Danish");
295
		code2name.put("div", "Divehi");
296
		code2name.put("dut/nld", "Dutch; Flemish");
297
		code2name.put("dzo", "Dzongkha");
298
		code2name.put("eng", "English");
299
		code2name.put("epo", "Esperanto");
300
		code2name.put("est", "Estonian");
301
		code2name.put("ewe", "Ewe");
302
		code2name.put("fao", "Faroese");
303
		code2name.put("fij", "Fijian");
304
		code2name.put("fin", "Finnish");
305
		code2name.put("fre/fra", "French");
306
		code2name.put("fry", "Frisian");
307
		code2name.put("ful", "Fulah");
308
		code2name.put("gla", "Gaelic; Scottish Gaelic");
309
		code2name.put("glg", "Galician");
310
		code2name.put("lug", "Ganda");
311
		code2name.put("geo/kat", "Georgian");
312
		code2name.put("ger/deu", "German");
313
		code2name.put("kik", "Gikuyu; Kikuyu");
314
		code2name.put("gre/ell", "Greek, Modern (1453-)");
315
		code2name.put("kal", "Greenlandic; Kalaallisut");
316
		code2name.put("grn", "Guarani");
317
		code2name.put("guj", "Gujarati");
318
		code2name.put("hat", "Haitian; Haitian Creole");
319
		code2name.put("hau", "Hausa");
320
		code2name.put("heb", "Hebrew");
321
		code2name.put("her", "Herero");
322
		code2name.put("hin", "Hindi");
323
		code2name.put("hmo", "Hiri Motu");
324
		code2name.put("hun", "Hungarian");
325
		code2name.put("ice/isl", "Icelandic");
326
		code2name.put("ido", "Ido");
327
		code2name.put("ibo", "Igbo");
328
		code2name.put("ind", "Indonesian");
329
		code2name.put("ina", "Auxiliary Language Association)");
330
		code2name.put("ile", "Interlingue");
331
		code2name.put("ES", "Spain");
332
		code2name.put("SK", "Slovakia");
333
		code2name.put("TR", "Turkey");
334
		code2name.put("EL", "Greece");
335
		code2name.put("SE", "Sweden");
336
		code2name.put("UK", "United Kingdom");
337
		code2name.put("SI", "Slovenia");
338
		code2name.put("US", "United States");
339
		code2name.put("ZA", "South Africa");
340
		code2name.put("VN", "Viet Nam");
341
		code2name.put("TH", "Thailand");
342
		code2name.put("UG", "Uganda");
343
		code2name.put("TW", "Taiwan");
344
		code2name.put("RS", "Serbia");
345
		code2name.put("SC", "Seychelles");
346
		code2name.put("TN", "Tunisia");
347
		code2name.put("UA", "Ukraine");
348
		code2name.put("SN", "Senegal");
349
		code2name.put("KO", "Kosovo * UN resolution");
350
		code2name.put("UY", "Uruguay");
351
		code2name.put("UZ", "Uzbekistan");
352
		code2name.put("LK", "Sri Lanka");
353
		code2name.put("SG", "Singapore");
354
		code2name.put("SY", "Syrian Arab Republic");
355
		code2name.put("ZM", "Zambia");
356
		code2name.put("SO", "Somalia");
357
		code2name.put("VE", "Venezuela");
358
		code2name.put("VG", "Virgin Islands (British)");
359
		code2name.put("AE", "United Arab Emirates");
360
		code2name.put("SM", "San Marino");
361
		code2name.put("TG", "Togo");
362
		code2name.put("AN", "Netherlands Antilles");
363
		code2name.put("TJ", "Tajikistan");
364
		code2name.put("TM", "Turkmenistan");
365
		code2name.put("SA", "Saudi Arabia");
366
		code2name.put("YE", "Yemen");
367
		code2name.put("SZ", "Swaziland");
368
		code2name.put("SR", "Suriname");
369
		code2name.put("iku", "Inuktitut");
370
		code2name.put("ipk", "Inupiaq");
371
		code2name.put("gle", "Irish");
372
		code2name.put("ita", "Italian");
373
		code2name.put("jpn", "Japanese");
374
		code2name.put("jav", "Javanese");
375
		code2name.put("kan", "Kannada");
376
		code2name.put("kau", "Kanuri");
377
		code2name.put("kas", "Kashmiri");
378
		code2name.put("kaz", "Kazakh");
379
		code2name.put("khm", "Khmer");
380
		code2name.put("kin", "Kinyarwanda");
381
		code2name.put("kir", "Kirghiz");
382
		code2name.put("kom", "Komi");
383
		code2name.put("kon", "Kongo");
384
		code2name.put("kor", "Korean");
385
		code2name.put("kua", "Kuanyama; Kwanyama");
386
		code2name.put("kur", "Kurdish");
387
		code2name.put("lao", "Lao");
388
		code2name.put("lat", "Latin");
389
		code2name.put("lav", "Latvian");
390
		code2name.put("ltz", "Letzeburgesch; Luxembourgish");
391
		code2name.put("lim", "Limburgan; Limburger; Limburgish");
392
		code2name.put("lin", "Lingala");
393
		code2name.put("lit", "Lithuanian");
394
		code2name.put("lub", "Luba-Katanga");
395
		code2name.put("mac/mkd", "Macedonian");
396
		code2name.put("mlg", "Malagasy");
397
		code2name.put("may/msa", "Malay");
398
		code2name.put("mal", "Malayalam");
399
		code2name.put("mlt", "Maltese");
400
		code2name.put("glv", "Manx");
401
		code2name.put("mao/mri", "Maori");
402
		code2name.put("mar", "Marathi");
403
		code2name.put("mah", "Marshallese");
404
		code2name.put("mol", "Moldavian");
405
		code2name.put("mon", "Mongolian");
406
		code2name.put("nau", "Nauru");
407
		code2name.put("nav", "Navajo; Navaho");
408
		code2name.put("nde", "Ndebele, North");
409
		code2name.put("nbl", "Ndebele, South");
410
		code2name.put("ndo", "Ndonga");
411
		code2name.put("nep", "Nepali");
412
		code2name.put("sme", "Northern Sami");
413
		code2name.put("nor", "Norwegian");
414
		code2name.put("nno", "Norwegian Nynorsk; Nynorsk, Norwegian");
415
		code2name.put("oci", "Occitan (post 1500); Provençal");
416
		code2name.put("oji", "Ojibwa");
417
		code2name.put("ori", "Oriya");
418
		code2name.put("orm", "Oromo");
419
		code2name.put("oss", "Ossetian; Ossetic");
420
		code2name.put("pli", "Pali");
421
		code2name.put("pan", "Panjabi; Punjabi");
422
		code2name.put("per/fas", "Persian");
423
		code2name.put("pol", "Polish");
424
		code2name.put("por", "Portuguese");
425
		code2name.put("pus", "Pushto");
426
		code2name.put("que", "Quechua");
427
		code2name.put("roh", "Raeto-Romance");
428
		code2name.put("rum/ron", "Romanian");
429
		code2name.put("run", "Rundi");
430
		code2name.put("rus", "Russian");
431
		code2name.put("smo", "Samoan");
432
		code2name.put("sag", "Sango");
433
		code2name.put("san", "Sanskrit");
434
		code2name.put("srd", "Sardinian");
435
		code2name.put("scc/srp", "Serbian");
436
		code2name.put("sna", "Shona");
437
		code2name.put("iii", "Sichuan Yi");
438
		code2name.put("snd", "Sindhi");
439
		code2name.put("sin", "Sinhala; Sinhalese");
440
		code2name.put("slo/slk", "Slovak");
441
		code2name.put("slv", "Slovenian");
442
		code2name.put("som", "Somali");
443
		code2name.put("sot", "Sotho, Southern");
444
		code2name.put("spa", "Spanish; Castilian");
445
		code2name.put("sun", "Sundanese");
446
		code2name.put("swa", "Swahili");
447
		code2name.put("ssw", "Swati");
448
		code2name.put("swe", "Swedish");
449
		code2name.put("tgl", "Tagalog");
450
		code2name.put("tah", "Tahitian");
451
		code2name.put("tgk", "Tajik");
452
		code2name.put("tam", "Tamil");
453
		code2name.put("tat", "Tatar");
454
		code2name.put("tel", "Telugu");
455
		code2name.put("tha", "Thai");
456
		code2name.put("tib/bod", "Tibetan");
457
		code2name.put("tir", "Tigrinya");
458
		code2name.put("ton", "Tonga (Tonga Islands)");
459
		code2name.put("tso", "Tsonga");
460
		code2name.put("tsn", "Tswana");
461
		code2name.put("tur", "Turkish");
462
		code2name.put("tuk", "Turkmen");
463
		code2name.put("twi", "Twi");
464
		code2name.put("uig", "Uighur; Uyghur");
465
		code2name.put("ukr", "Ukrainian");
466
		code2name.put("urd", "Urdu");
467
		code2name.put("uzb", "Uzbek");
468
		code2name.put("ven", "Venda");
469
		code2name.put("vie", "Vietnamese");
470
		code2name.put("vol", "Volapük");
471
		code2name.put("wln", "Walloon");
472
		code2name.put("wel/cym", "Welsh");
473
		code2name.put("wol", "Wolof");
474
		code2name.put("xho", "Xhosa");
475
		code2name.put("yid", "Yiddish");
476
		code2name.put("yor", "Yoruba");
477
		code2name.put("zha", "Zhuang; Chuang");
478
		code2name.put("zul", "Zulu");
479
		code2name.put("deu/ger", "German");
480
		code2name.put("fra/fre", "French");
481
		code2name.put("srr", "Serbian");
482
		code2name.put("esl/spa", "Spanish");
483
		code2name.put("und", "Undetermined");
484
		code2name.put("UNKNOWN", "UNKNOWN");
485
		code2name.put("entityregistry", "entityregistry");
486
		code2name.put("aggregator", "aggregator");
487
		code2name.put("dataarchive", "dataarchive");
488
		code2name.put("cris", "cris");
489
		code2name.put("repository", "repository");
490
		code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network");
491
		code2name.put("ec:specificprogram", "specificprogram");
492
		code2name.put("ec:program", "program");
493
		code2name.put("ec:hasframeworkprogram", "hasframeworkprogram");
494
		code2name.put("ec:hasprogram", "hasprogram");
495
		code2name.put("171", "Article 171 of the Treaty");
496
		code2name.put("BSG", "Research for the benefit of specific groups");
497
		code2name.put("CP", "Collaborative project");
498
		code2name.put("providedBy", "provided by");
499
		code2name.put("dataset", "dataset");
500
		code2name.put("publication", "publication");
501
		code2name.put("dataset_dataset", "dataset_dataset");
502
		code2name.put("publication_dataset", "publication_dataset");
503
		code2name.put("publication_publication", "publication_publication");
504
		code2name.put("coordinator", "coordinator");
505
		code2name.put("participant", "participant");
506
		code2name.put("subcontractor", "subcontractor");
507
		code2name.put("principal investigating", "principal investigating");
508
		code2name.put("exploitation", "exploitation");
509
		code2name.put("collection", "collection");
510
		code2name.put("event", "event");
511
		code2name.put("film", "film");
512
		code2name.put("image", "image");
513
		code2name.put("interactiveResource", "interactiveResource");
514
		code2name.put("model", "model");
515
		code2name.put("physicalObject", "physicalObject");
516
		code2name.put("service", "service");
517
		code2name.put("software", "software");
518
		code2name.put("sound", "sound");
519
		code2name.put("text", "text");
520
		code2name.put("0000", "Unknown");
521
		code2name.put("0001", "Article");
522
		code2name.put("0002", "Book");
523
		code2name.put("0004", "Conference object");
524
		code2name.put("0005", "Contribution for newspaper or weekly magazine");
525
		code2name.put("0006", "Doctoral thesis");
526
		code2name.put("0007", "Master thesis");
527
		code2name.put("0008", "Bachelor thesis");
528
		code2name.put("0009", "External research report");
529
		code2name.put("0010", "Lecture");
530
		code2name.put("0011", "Internal report");
531
		code2name.put("0012", "Newsletter");
532
		code2name.put("0013", "Part of book or chapter of book");
533
		code2name.put("0014", "Research");
534
		code2name.put("0015", "Review");
535
		code2name.put("0016", "Preprint");
536
		code2name.put("0017", "Report");
537
		code2name.put("0018", "Annotation");
538
		code2name.put("0019", "Patent");
539
		code2name.put("0020", "Other");
540
		code2name.put("0021", "Dataset");
541
		code2name.put("main title", "main title");
542
		code2name.put("subtitle", "subtitle");
543
		code2name.put("alternative title", "alternative title");
544
		code2name.put("translated title", "translated title");
545
		code2name.put("OPEN", "Open Access");
546
		code2name.put("12MONTHS", "12 Months Embargo");
547
		code2name.put("OTHER", "Other");
548
		code2name.put("6MONTHS", "6 Months Embargo");
549
		code2name.put("RESTRICTED", "Restricted");
550
		code2name.put("EMBARGO", "Embargo");
551
		code2name.put("CLOSED", "Closed Access");
552
		code2name.put("wt:fundingStream", "Wellcome Trust: Funding Stream");
553
		code2name.put("wt:hasParentFunding", "wt:hasParentFunding");
554
		code2name.put("author", "author");
555
		code2name.put("isResultOf", "isResultOf");
556
		code2name.put("driver", "driver");
557
		code2name.put("openaire", "openaire");
558
		code2name.put("notCompatible", "notCompatible");
559
		code2name.put("available", "available");
560
		code2name.put("copyrighted", "copyrighted");
561
		code2name.put("created", "created");
562
		code2name.put("endDate", "endDate");
563
		code2name.put("issued", "issued");
564
		code2name.put("startDate", "startDate");
565
		code2name.put("submitted", "submitted");
566
		code2name.put("updated", "updated");
567
		code2name.put("valid", "valid");
568
		code2name.put("sysimport:crosswalk:repository", "sysimport:crosswalk:repository");
569
		code2name.put("sysimport:crosswalk:aggregator", "sysimport:crosswalk:aggregator");
570
		code2name.put("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry");
571
		code2name.put("sysimport:crosswalk:datasetarchive", "sysimport:crosswalk:datasetarchive");
572
		code2name.put("sysimport:crosswalk:cris", "sysimport:crosswalk:cris");
573
		code2name.put("sysimport:mining:repository", "sysimport:mining:repository");
574
		code2name.put("sysimport:mining:aggregator", "sysimport:mining:aggregator");
575
		code2name.put("sysimport:mining:entityregistry", "sysimport:mining:entityregistry");
576
		code2name.put("sysimport:mining:datasetarchive", "sysimport:mining:datasetarchive");
577
		code2name.put("sysimport:mining:cris", "sysimport:mining:cris");
578
		code2name.put("userclaim:doi", "userclaim:doi");
579
		code2name.put("userclaim:driver", "userclaim:driver");
580
		code2name.put("userclaim:orcid", "userclaim:orcid");
581
		code2name.put("ec:frameworkprogram", "frameworkprogram");
582
		code2name.put("ec:hasspecificprogram", "hasspecificprogram");
583
		code2name.put("CY", "Cyprus");
584
		code2name.put("CH", "Switzerland");
585
		code2name.put("PL", "Poland");
586
		code2name.put("DE", "Germany");
587
		code2name.put("RU", "Russian Federation");
588
		code2name.put("TZ", "Tanzania (United Republic of)");
589
		code2name.put("MD", "Moldova (Republic of)");
590
		code2name.put("LA", "Lao (People's Democratic Republic)");
591
		code2name.put("SD", "Sudan");
592
		code2name.put("ZW", "Zimbabwe");
593
		code2name.put("GM", "Gambia");
594
		code2name.put("SV", "El Salvador");
595
		code2name.put("MV", "Maldives");
596
		code2name.put("TT", "Trinidad and Tobago");
597
		code2name.put("MH", "Marshall Islands");
598
		code2name.put("MO", "Macao");
599
		code2name.put("XK", "Kosovo * UN resolution");
600
		code2name.put("LY", "Libyan Arab Jamahiriya");
601
		code2name.put("CP-CSA", "Combination of CP & CSA");
602
		code2name.put("CSA", "Coordination and support action");
603
		code2name.put("ERC", "Support for frontier research (ERC)");
604
		code2name.put("NoE", "Network of Excellence");
605
		code2name.put("MC", "Support for training and career development of researchers (Marie Curie)");
606
	}
607

  
608
	// Builder for Entities
609
	protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
610
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
611
	}
612

  
613
	// Builder for Rels
614
	protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
615
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
616
	}
617

  
618
	private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
619
		return oaf.setDataInfo(ensureDataInfo(info)).setTimestamp(System.currentTimeMillis());
620
	}
621

  
622
	protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
623
		if (info.isInitialized()) { return info; }
624
		return getDataInfo("UNKNOWN", "0.9", false, false);
625
	}
626

  
627
	protected static KeyValue getKV(final String id, final String name) {
628
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
629
	}
630

  
631
	protected static OafRel.Builder getRel(final String sourceId,
632
			final String targetId,
633
			final RelType relType,
634
			final SubRelType subRelType,
635
			final String relClass,
636
			final boolean isChild) {
637
		return OafRel.newBuilder().setSource(sourceId).setTarget(targetId).setRelType(relType).setSubRelType(subRelType).setRelClass(relClass)
638
				.setChild(isChild);
639
	}
640

  
641
	protected static OafEntity.Builder getEntity(final Type type,
642
			final String id,
643
			final KeyValue collectedFrom,
644
			final String originalId,
645
			final String dateOfCollection,
646
			final List<StructuredProperty> pids) {
647
		OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id).addCollectedfrom(collectedFrom).addOriginalId(originalId)
648
				.setDateofcollection(dateOfCollection);
649

  
650
		if ((pids != null) && !pids.isEmpty()) {
651
			builder.addAllPid(pids);
652
		}
653

  
654
		return builder;
655
	}
656

  
657
	public static DataInfo.Builder getDataInfo(String provenanceaction, String trust, final boolean deletedbyinference, final boolean inferred) {
658
		if ((provenanceaction == null) || provenanceaction.isEmpty()) {
659
			provenanceaction = "UNKNOWN";
660
		}
661
		if ((trust == null) || trust.isEmpty()) {
662
			trust = "0.1";
663
		}
664
		return DataInfo.newBuilder().setDeletedbyinference(deletedbyinference).setInferred(inferred).setTrust(trust)
665
				.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions"));
666
	}
667

  
668
	protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
669
		return getQualifier(classname, classname, schemename, schemename);
670
	}
671

  
672
	protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
673
		return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
674
	}
675

  
676
	protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
677
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
678
	}
679

  
680
	protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
681
		if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) { return null; }
682

  
683
		if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
684
			qualifier.setClassid(fields.get(0));
685
			String name = code2name.get(fields.get(0));
686
			qualifier.setClassname(name != null ? name : fields.get(0));
687
		}
688
		return qualifier;
689
	}
690

  
691
	protected static void addStructuredProps(final Message.Builder builder,
692
			final FieldDescriptor fd,
693
			final List<String> values,
694
			final String classid,
695
			final String schemeid) {
696
		if (values != null) {
697
			for (String s : values) {
698
				addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid));
699
			}
700
		}
701
	}
702

  
703
	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
704

  
705
		final List<StructuredProperty> pids = Lists.newArrayList();
706

  
707
		for (int i = 0; i < nodelist.getLength(); i++) {
708
			final Node node = nodelist.item(i);
709
			if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getLocalName().toLowerCase().equals("identifier")) {
710

  
711
				final Node pidType = node.getAttributes().getNamedItem("identifierType");
712

  
713
				for (int j = 0; j < node.getChildNodes().getLength(); j++) {
714
					Node child = node.getChildNodes().item(j);
715

  
716
					if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff