Project

General

Profile

« Previous | Next » 

Revision 53516

[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-6.2.17

View differences:

modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/resources/eu/dnetlib/pace/result.simple.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {		
14
  		"conditions" : [ ],		
15
		"model" : [
16
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" }
17
		],
18
		"blacklists" : { } 		
19
	}
20

  
21
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaireplus-mapping-utils</artifactId>
12
	<packaging>jar</packaging>
13
	<version>6.2.17</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17</developerConnection>
16
	</scm>
17

  
18
	<properties>
19
		<skipITs>true</skipITs>
20
	</properties>
21
	<build>
22
		<plugins>
23
			<plugin>
24
				<groupId>org.apache.maven.plugins</groupId>
25
				<artifactId>maven-failsafe-plugin</artifactId>
26
				<version>2.19.1</version>
27
				<executions>
28
					<execution>
29
						<id>integration-test</id>
30
						<goals>
31
							<goal>integration-test</goal>
32
						</goals>
33
					</execution>
34
					<execution>
35
						<id>verify</id>
36
						<goals>
37
							<goal>verify</goal>
38
						</goals>
39
					</execution>
40
				</executions>
41
				<configuration>
42
					<skipITs>${skipITs}</skipITs>
43
				</configuration>
44
			</plugin>
45
		</plugins>
46
	</build>
47

  
48
	<dependencies>
49
		<dependency>
50
			<groupId>com.google.guava</groupId>
51
			<artifactId>guava</artifactId>
52
			<version>${google.guava.version}</version>
53
		</dependency>		
54
		<dependency>
55
			<groupId>junit</groupId>
56
			<artifactId>junit</artifactId>
57
			<version>${junit.version}</version>
58
			<scope>test</scope>
59
		</dependency>
60
		<dependency>
61
			<groupId>com.ximpleware</groupId>
62
			<artifactId>vtd-xml</artifactId>
63
			<version>[2.12, 3.0.0)</version>
64
		</dependency>
65
		<dependency>
66
			<groupId>commons-codec</groupId>
67
			<artifactId>commons-codec</artifactId>
68
			<version>${commons.codec.version}</version>
69
		</dependency>
70
		<dependency>
71
			<groupId>dom4j</groupId>
72
			<artifactId>dom4j</artifactId>
73
			<version>${dom4j.version}</version>
74
			<exclusions>
75
				<exclusion>
76
					<artifactId>xml-apis</artifactId>
77
					<groupId>xml-apis</groupId>
78
				</exclusion>
79
			</exclusions>
80
		</dependency>
81
		<dependency>
82
			<groupId>net.sf.supercsv</groupId>
83
			<artifactId>super-csv</artifactId>
84
			<version>2.4.0</version>
85
		</dependency>
86
		<dependency>
87
			<groupId>eu.dnetlib</groupId>
88
			<artifactId>dnet-openaire-data-protos</artifactId>
89
			<version>[3.9.3]</version>
90
		</dependency>
91
		<dependency>
92
			<groupId>eu.dnetlib</groupId>
93
			<artifactId>dnet-pace-core</artifactId>
94
			<version>[3.0.0,4.0.0)</version>
95
		</dependency>
96
		<dependency>
97
			<groupId>eu.dnetlib</groupId>
98
			<artifactId>cnr-misc-utils</artifactId>
99
			<version>[1.0.0,2.0.0)</version>
100
		</dependency>
101
		<dependency>
102
			<groupId>eu.dnetlib</groupId>
103
			<artifactId>dnet-hadoop-commons</artifactId>
104
			<version>[2.0.0,3.0.0)</version>
105
		</dependency>
106
		<dependency>
107
			<groupId>eu.dnetlib</groupId>
108
			<artifactId>dnet-index-solr-common</artifactId>
109
			<version>[1.0.0,1.3.1]</version>
110
			<!-- uncomment to include solrj 7.2.0 -->
111
			<!--<version>[1.0.0,2.0.0]</version>-->
112
		</dependency>	
113
		<dependency>
114
			<groupId>com.googlecode.protobuf-java-format</groupId>
115
			<artifactId>protobuf-java-format</artifactId>
116
			<version>1.2</version>
117
		</dependency>
118
		<dependency>
119
			<groupId>org.apache.commons</groupId>
120
			<artifactId>commons-lang3</artifactId>
121
			<version>3.5</version>
122
		</dependency>
123

  
124
		<!-- test deps -->
125
		<dependency>
126
			<groupId>eu.dnetlib</groupId>
127
			<artifactId>dnet-openaireplus-profiles</artifactId>
128
			<version>[1.0.0,2.0.0)</version>
129
			<scope>test</scope>
130
		</dependency>
131
		<dependency>
132
			<groupId>org.mongodb</groupId>
133
			<artifactId>mongo-java-driver</artifactId>
134
			<version>${mongodb.driver.version}</version>
135
			<scope>test</scope>
136
		</dependency>
137
		<dependency>
138
			<groupId>org.springframework</groupId>
139
			<artifactId>spring-context</artifactId>
140
			<version>${spring.version}</version>
141
			<scope>test</scope>
142
		</dependency>
143
		<dependency>
144
			<groupId>org.springframework</groupId>
145
			<artifactId>spring-core</artifactId>
146
			<version>${spring.version}</version>
147
			<scope>test</scope>
148
		</dependency>
149
		<dependency>
150
			<groupId>org.springframework</groupId>
151
			<artifactId>spring-test</artifactId>
152
			<version>${spring.version}</version>
153
			<scope>test</scope>
154
		</dependency>
155

  
156
	</dependencies>
157
</project>
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/resources/test.properties
1
mongodb.host    =   node5.t.openaire.research-infrastructures.eu
2
mongodb.port    =   27017
3
mongodb.dbname  =   test_ci
4
test.limit      =   10000
5
test.logFreq    =   2000
6
test.batchSize  =   5000
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/resources/eu/dnetlib/pace/crossref.json
1
{
2
  "pid": [
3
    {
4
      "qualifier": {
5
        "classid": "doi",
6
        "classname": "doi",
7
        "schemename": "dnet:pid_types",
8
        "schemeid": "dnet:pid_types"
9
      },
10
      "value": "10.1002/9781444393675.ch6"
11
    }
12
  ],
13
  "result": {
14
    "instance": [
15
      {
16
        "url": [
17
          "http://dx.doi.org/10.1002/9781444393675.ch6"
18
        ],
19
        "collectedfrom": {
20
          "value": "CrossRef",
21
          "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"
22
        },
23
        "hostedby": {
24
          "value": "Unknown Repository",
25
          "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c"
26
        },
27
        "accessright": {
28
          "classid": "CLOSED",
29
          "classname": "Closed Access",
30
          "schemename": "dnet:access_modes",
31
          "schemeid": "dnet:access_modes"
32
        },
33
        "instancetype": {
34
          "classid": "0013",
35
          "classname": "Part of book or chapter of book",
36
          "schemename": "dnet:publication_resource",
37
          "schemeid": "dnet:publication_resource"
38
        }
39
      }
40
    ],
41
    "metadata": {
42
      "title": [
43
        {
44
          "qualifier": {
45
            "classid": "main title",
46
            "classname": "main title",
47
            "schemename": "dnet:dataCite_title",
48
            "schemeid": "dnet:dataCite_title"
49
          },
50
          "value": "Henry James (1843-1916)"
51
        }
52
      ],
53
      "resulttype": {
54
        "classid": "publication",
55
        "classname": "publication",
56
        "schemename": "dnet:result_typologies",
57
        "schemeid": "dnet:result_typologies"
58
      }
59
    }
60
  },
61
  "collectedfrom": [
62
    {
63
      "value": "Microsoft Academic Graph",
64
      "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"
65
    },
66
    {
67
      "value": "CrossRef",
68
      "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"
69
    },
70
    {
71
      "value": "UnpayWall",
72
      "key": "10|openaire____::8ac8380272269217cb09a928c8caa993"
73
    }
74
  ],
75
  "dateofcollection": "2018-08-07 12:24:48Z",
76
  "type": 50,
77
  "id": "50|crossref____::0000002a9885b7ec89b7b9d8ff3331a0"
78
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/resources/eu/dnetlib/pace/result.full.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {		
14
		"clustering" : [
15
			{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} },
16
			{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
17
			{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } } 
18
		],		
19
		"conditions" : [
20
  			{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] },
21
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] },
22
  			{ "name" : "sizeMatch", "fields" : [ "authors" ] } ,
23
				{ "name" : "pidMatch", "fields" : [ "pid" ] }
24
  		],		
25
		"model" : [
26
			{ "name" : "pid", "algo" : "Null", "type" : "JSON", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid", "overrideMatch" : "true" },
27
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
28
			{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" } ,
29
			{ "name" : "authors", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/author/fullname" }
30
		],
31
		"blacklists" : {
32
			"title" : [
33
				"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
34
				"^(Kiri Karl Morgensternile).*$",
35
				"^(\\[Eksliibris Aleksandr).*\\]$",
36
				"^(\\[Eksliibris Aleksandr).*$",
37
				"^(Eksliibris Aleksandr).*$",
38
				"^(Kiri A\\. de Vignolles).*$",
39
				"^(2 kirja Karl Morgensternile).*$",
40
				"^(Pirita kloostri idaosa arheoloogilised).*$",
41
				"^(Kiri tundmatule).*$",
42
				"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
43
				"^(Eksliibris Nikolai Birukovile).*$",
44
				"^(Eksliibris Nikolai Issakovile).*$",
45
				"^(WHP Cruise Summary Information of section).*$",
46
				"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
47
				"^(Measurement of the spin\\-dependent structure function).*"
48
			] } 		
49
	}
50

  
51
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml/Element.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.Map;
4

  
5
import org.apache.commons.lang.StringUtils;
6

  
7
import com.google.common.collect.Maps;
8

  
9
public class Element {
10

  
11
	private String text;
12
	private Map<String, String> attributes;
13

  
14
	public Element(final String text, final Map<String, String> attributes) {
15
		this.text = text;
16
		this.attributes = attributes;
17
	}
18

  
19
	public Element(final String text) {
20
		this.text = text;
21
		this.attributes = Maps.newHashMap();
22
	}
23

  
24
	public Element() {
25
		this.text = "";
26
		this.attributes = Maps.newHashMap();
27
	}
28

  
29
	public String getText() {
30
		return text;
31
	}
32

  
33
	public void setText(final String text) {
34
		this.text = text;
35
	}
36

  
37
	public Map<String, String> getAttributes() {
38
		return attributes;
39
	}
40

  
41
	public String getAttributeValue(final String attributeName) {
42
		return getAttributes().get(attributeName);
43
	}
44

  
45
	public void setAttributes(final Map<String, String> attributes) {
46
		this.attributes = attributes;
47
	}
48

  
49
	public boolean isEmpty() {
50
		return !(hasText() || hasAttributes());
51
	}
52

  
53
	private boolean hasAttributes() {
54
		return (getAttributes() != null) && !getAttributes().isEmpty();
55
	}
56

  
57
	public boolean hasText() {
58
		return (getText() != null) && !getText().isEmpty();
59
	}
60

  
61
	@Override
62
	public String toString() {
63
		return "{ " + StringUtils.left(text, 20) + attributes.toString() + " }";
64
	}
65
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/java/eu/dnetlib/data/bulktag/CommunityConfigurationFactoryTest.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import org.apache.commons.io.IOUtils;
4
import org.apache.commons.lang3.StringUtils;
5
import org.dom4j.DocumentException;
6
import org.junit.Before;
7
import org.junit.Test;
8

  
9
import java.io.IOException;
10

  
11
import static org.junit.Assert.assertEquals;
12
import static org.junit.Assert.assertTrue;
13

  
14
/**
15
 * Created by miriam on 03/08/2018.
16
 */
17
public class CommunityConfigurationFactoryTest {
18

  
19
    private String xml;
20

  
21
    @Before
22
    public void setUp() throws IOException, DocumentException {
23
        xml = IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml"));
24
    }
25

  
26
    @Test
27
    public void parseTest() throws DocumentException {
28

  
29
        final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml);
30
        assertEquals(cc.size(),4);
31
        cc.getCommunityList().forEach(c -> assertTrue(StringUtils.isNoneBlank(c.getId())));
32

  
33

  
34
    }
35
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.HashMap;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Set;
7

  
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Maps;
10
import com.google.common.collect.Sets;
11
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
12
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
13
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
14
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
15
import eu.dnetlib.data.proto.OafProtos.Oaf;
16
import eu.dnetlib.data.proto.OafProtos.OafEntity;
17
import eu.dnetlib.data.proto.ResultProtos.Result;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import org.apache.commons.lang3.StringUtils;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.NamedNodeMap;
24
import org.w3c.dom.Node;
25
import org.w3c.dom.NodeList;
26

  
27
public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
28

  
29
	private static Map<String, String> mappingAccess = Maps.newHashMap();
30

  
31
	static {
32

  
33
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
34
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
35
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
36
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
37

  
38
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
39
		mappingAccess.put("OPEN", "OPEN");
40
		mappingAccess.put("CLOSED", "CLOSED");
41
		mappingAccess.put("RESTRICTED", "RESTRICTED");
42
		mappingAccess.put("EMBARGO", "EMBARGO");
43
		mappingAccess.put("OPEN SOURCE", "OPEN SOURCE");
44

  
45
	}
46

  
47
	public static String odfResult(
48
			final String resultId,
49
			final boolean invisible,
50
			final NodeList about,
51
			final NodeList metadata,
52
			final NodeList titles,
53
			final NodeList creators,
54
			final NodeList subjects,
55
			final NodeList publisher,
56
			final NodeList descriptions,
57
			final NodeList dates,
58
			final NodeList dateaccepted,
59
			final NodeList resourceTypes,
60
			final NodeList formats,
61
			final NodeList sizes,
62
			final NodeList languages,
63
			final NodeList cobjcategory,
64
			final NodeList contributors,
65
			final NodeList rights,
66
			final NodeList license,
67
			final NodeList version,
68
			final NodeList pidList,
69
			final String provenance,
70
			final String trust,
71
			final NodeList hostedby,
72
			final NodeList collectedfrom,
73
			final NodeList originalIds,
74
			final String instanceUri,
75
			final String landingPage,
76
			final NodeList distributionlocation,
77
			final NodeList documentationUrl,
78
			final String dateOfCollection,
79
			final String dateOfTransformation) {
80

  
81
		try {
82
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
83

  
84
			final Result.Builder result = Result.newBuilder();
85
			Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder();
86

  
87
			// subject
88
			for (int i = 0; i < subjects.getLength(); i++) {
89
				Node currentNode = subjects.item(i);
90
				NodeList childNodes = currentNode.getChildNodes();
91
				if (childNodes.getLength() > 0) {
92
					String subjectValue = childNodes.item(0).getNodeValue();
93
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"),
94
							getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies"));
95
				}
96
			}
97

  
98
			// title
99
			for (int i = 0; i < titles.getLength(); i++) {
100
				Node currentNode = titles.item(i);
101
				NodeList childNodes = currentNode.getChildNodes();
102
				if (childNodes.getLength() > 0) {
103
					String titleValue = childNodes.item(0).getNodeValue();
104
					String classname = "main title";
105
					String classid = "main title";
106
					if (currentNode.hasAttributes()) {
107
						NamedNodeMap attributes = currentNode.getAttributes();
108
						Node titleType = attributes.getNamedItem("titleType");
109

  
110
						if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) {
111
							classname = "alternative title";
112
							classid = "alternative title";
113
						}
114
						if (titleType != null && titleType.getNodeValue().equals("Subtitle")) {
115
							classname = "subtitle";
116
							classid = "subtitle";
117
						}
118
						if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) {
119
							classname = "translated title";
120
							classid = "translated title";
121
						}
122
					}
123
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"),
124
							getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title"));
125
				}
126
			}
127

  
128
			// creators
129
			for (int i = 0; i < creators.getLength(); i++) {
130
				final Element creator = (Element) creators.item(i);
131
				if (creator != null && creator.hasChildNodes()) {
132

  
133
					final NodeList creatorNames = creator.getElementsByTagName("creatorName");
134
					if (creatorNames.getLength() > 0) {
135
						final Element creatorName = (Element) creatorNames.item(0);
136

  
137
						final Author.Builder author = Author.newBuilder();
138
						author.setRank(i+1);
139
						final String fullname = StringUtils.trim(creatorName.getTextContent());
140

  
141
						author.setFullname(fullname);
142

  
143
						final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
144
						if (p.isAccurate()) {
145
							author.setName(p.getNormalisedFirstName());
146
							author.setSurname(p.getNormalisedSurname());
147
						}
148
						final NodeList nameIdentifiers = creator.getElementsByTagName("nameIdentifier");
149
						if (nameIdentifiers.getLength() > 0) {
150
							final Element nameIdentifier = (Element) nameIdentifiers.item(0);
151
							final String nameIdentifierScheme = nameIdentifier.getAttribute("nameIdentifierScheme");
152
							final String id = StringUtils.trim(nameIdentifier.getTextContent());
153
							if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(nameIdentifierScheme)) {
154
								author.addPid(getKV(nameIdentifierScheme, id));
155
							}
156
						}
157

  
158
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("author"), author);
159
					}
160
				}
161
			}
162

  
163
			// description
164
			for (int i = 0; i < descriptions.getLength(); i++) {
165
				Element currentNode = (Element) descriptions.item(i);
166
				if (currentNode != null && currentNode.hasChildNodes()) {
167
					String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
168

  
169
					final String descriptionType = currentNode.getAttribute("descriptionType");
170
					if (StringUtils.isNotBlank(descriptionType)) {
171
						switch (descriptionType) {
172
						case "TechnicalInfo":
173
							addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("tool"), descriptionValue);
174
							break;
175
						case "Abstract":
176
							addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
177
							break;
178
						case "DistributionForm":
179
							addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), descriptionValue);
180
							break;
181
						}
182
					} else {
183
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
184
					}
185
				}
186
			}
187

  
188
			// contributors
189
			for (int i = 0; i < contributors.getLength(); i++) {
190
				final Element contributor = (Element) contributors.item(i);
191
				if (contributor != null && contributor.hasChildNodes()) {
192

  
193
					NodeList contributorNames = contributor.getElementsByTagName("contributorName");
194
					if (contributorNames != null) {
195
						Element contributorName = (Element) contributorNames.item(0);
196
						if (contributorName != null) {
197
							final String contributorValue = contributorName.getTextContent();
198
							final String contributorType = contributor.getAttribute("contributorType");
199

  
200
							if (StringUtils.isNotBlank(contributorType)) {
201
								switch (contributorType) {
202
								case "ContactPerson":
203
									addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contactperson"), contributorValue);
204
									break;
205
								case "ContactGroup":
206
									addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contactgroup"), contributorValue);
207
									break;
208
								}
209
							} else {
210
								addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
211
							}
212
						}
213
					}
214
				}
215
			}
216

  
217
			// publisher
218
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
219

  
220
			// dates
221
			for (int i = 0; i < dates.getLength(); i++) {
222
				Node currentNode = dates.item(i);
223
				if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) {
224
					String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue();
225
					String dateValue = currentNode.getChildNodes().item(0).getNodeValue();
226
					String protoAttribute = "relevantdate";
227
					if ("Accepted".equals(dateAttribute)) {
228
						protoAttribute = "dateofacceptance";
229
					} else if ("Issued".equals(dateAttribute)) {
230
						protoAttribute = "storagedate";
231
					} else if ("Updated".equals(dateAttribute)) {
232
						protoAttribute = "lastmetadataupdate";
233
					} else if ("Available".equals(dateAttribute)) {
234
						protoAttribute = "embargoenddate";
235
					}
236
					if (protoAttribute.equals("relevantdate") == false) {
237
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue);
238
					} else {
239
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute),
240
								getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date"));
241
					}
242
				}
243
			}
244

  
245
			// dateofacceptance
246
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
247

  
248
			// size
249
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
250

  
251
			// version
252
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version));
253

  
254
			// language
255
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"),
256
					setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
257

  
258
			// resource type
259
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"),
260
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes))));
261

  
262
			// resultType
263
			final String cobjcategoryCode = getFirstItem(cobjcategory);
264
			final String resulttype = getResultType(cobjcategory);
265
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
266

  
267
			switch (resulttype) {
268
			case "software" :
269
				// format
270
				addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("programmingLanguage"),
271
						getSimpleQualifier(getFirstItem(formats), "dnet:programming_languages"));
272
				break;
273
			case "dataset":
274
				for (int i = 0; i < formats.getLength(); i++) {
275
					Node currentNode = formats.item(i);
276
					NodeList childNodes = currentNode.getChildNodes();
277
					if (childNodes.getLength() > 0) {
278
						String formatValue = childNodes.item(0).getNodeValue();
279
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), formatValue);
280
					}
281
				}
282
				break;
283
			case "other":
284

  
285
				break;
286
			}
287

  
288
			// documentationUrl
289
			for (int i = 0; i < documentationUrl.getLength(); i++) {
290
				final Element docUrl = (Element) documentationUrl.item(i);
291
				if (docUrl != null && docUrl.hasChildNodes()) {
292
					final String value = docUrl.getTextContent();
293
					if (StringUtils.isNotBlank(value)) {
294
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("documentationUrl"), value);
295
					}
296
				}
297
			}
298

  
299
			// contexts
300
			ValueMap values = ValueMap.parseNodeList(metadata);
301
			if (values.get("concept") != null) {
302
				for (final eu.dnetlib.data.transform.xml.Element e : values.get("concept")) {
303
					final String id = e.getAttributes().get("id");
304
					if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
305
					metadataProto.addContext(Context.newBuilder().setId(id));
306
				}
307
			}
308

  
309
			final List<KeyValue> hostedBys = getKeyValues(ValueMap.parseNodeList(hostedby), "hostedby", Type.datasource);
310
			final List<KeyValue> collectedFroms = getKeyValues(ValueMap.parseNodeList(collectedfrom), "collectedfrom", Type.datasource);
311

  
312
			final Instance.Builder instance = Instance.newBuilder();
313

  
314
			String tmpRigths = "UNKNOWN";
315
			final String firstRight = getFirstItem(rights);
316
			if (mappingAccess.containsKey(firstRight)) {
317
				tmpRigths = mappingAccess.get(firstRight);
318
			}
319

  
320
			addField(instance, Instance.getDescriptor().findFieldByName("license"), getFirstItem(license));
321
			addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBys);
322

  
323
			addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
324
					setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths)));
325

  
326
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
327
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(cobjcategoryCode)));
328

  
329
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
330
			if (StringUtils.isNotBlank(landingPage)) {
331
				addField(instance, Instance.getDescriptor().findFieldByName("url"), landingPage);
332
			}
333
			addField(instance, Instance.getDescriptor().findFieldByName("distributionlocation"), getFirstItem(distributionlocation));
334

  
335
			addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFroms);
336
			addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
337

  
338
			result.addInstance(instance);
339

  
340
			List<StructuredProperty> pids = parsePids(pidList);
341

  
342
			// original ids
343
			final Set<String> originalIdList = Sets.newHashSet();
344
			for (int i = 0; i < originalIds.getLength(); i++) {
345
				Node currentNode = originalIds.item(i);
346
				if (currentNode != null && currentNode.hasChildNodes()) {
347
					originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue());
348
				}
349
			}
350

  
351
			OafEntity.Builder entity =
352
					getEntity(Type.result, entityId, collectedFroms, originalIdList, dateOfCollection, dateOfTransformation, pids).setResult(
353
							result.setMetadata(metadataProto));
354

  
355
			entity.setOaiprovenance(getOAIProvenance(about));
356

  
357
			Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
358
			return base64(oaf.toByteArray());
359
		} catch (Exception e) {
360
			e.printStackTrace(System.err);
361
			throw new RuntimeException(e);
362
		}
363

  
364
	}
365

  
366
	private static String getResultType(final NodeList cobjcategoryNode) {
367

  
368
		final ValueMap values = ValueMap.parseNodeList(cobjcategoryNode);
369

  
370
		final eu.dnetlib.data.transform.xml.Element cobjcategory = values.get("cobjcategory").stream()
371
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new eu.dnetlib.data.transform.xml.Element("0000", e.getAttributes()))
372
				.findFirst()
373
				.orElse(new eu.dnetlib.data.transform.xml.Element("0000", new HashMap<>()));
374

  
375
		final String resulttype = cobjcategory.getAttributeValue("type");
376
		if (StringUtils.isNotBlank(resulttype)) {
377
			return resulttype;
378
		}
379

  
380
		return getDefaultResulttype(cobjcategory);
381
	}
382

  
383
	public static String getFirstItem(final NodeList list) {
384
		String out = "";
385
		if (list != null) {
386

  
387
			if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) {
388
				out = list.item(0).getChildNodes().item(0).getNodeValue();
389
			}
390
		}
391
		return out;
392
	}
393

  
394
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml/ValueMap.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5

  
6
import org.w3c.dom.NamedNodeMap;
7
import org.w3c.dom.Node;
8
import org.w3c.dom.NodeList;
9

  
10
import com.google.common.collect.Maps;
11

  
12
@SuppressWarnings("serial")
13
public class ValueMap extends HashMap<String, ElementList> {
14

  
15
	public static String IDX_ATTRIBUTE = "idx";
16

  
17
	public static ValueMap parseNodeList(final NodeList nodeList) {
18
		final ValueMap values = new ValueMap();
19

  
20
		for (int i = 0; i < nodeList.getLength(); i++) {
21
			getNodeValue(nodeList.item(i), values);
22
		}
23
		return values;
24
	}
25

  
26
	protected static void getNodeValue(final Node node, final ValueMap values) {
27

  
28
		final String nodeName = node.getLocalName().toLowerCase();
29

  
30
		final Node nodeText = node.getFirstChild();
31
		final Element element = nodeText != null ? new Element(nodeText.getNodeValue()) : new Element();
32
		final Map<String, String> attrs = Maps.newHashMap();
33

  
34
		final NamedNodeMap attributeList = node.getAttributes();
35
		for (int j = 0; j < attributeList.getLength(); j++) {
36
			Node attr = attributeList.item(j);
37
			if ((attr.getNodeValue() != null) && !attr.getNodeValue().isEmpty()) {
38
				attrs.put(attr.getLocalName(), attr.getNodeValue());
39
				if (values.containsKey(nodeName)) {
40
					attrs.put(IDX_ATTRIBUTE, String.valueOf(values.get(nodeName).size() + 1));
41
				} else {
42
					attrs.put(IDX_ATTRIBUTE, "1");
43
				}
44
			}
45
		}
46
		element.setAttributes(attrs);
47

  
48
		if (!element.isEmpty()) {
49
			if (!values.containsKey(nodeName)) {
50
				values.put(nodeName, new ElementList());
51
			}
52

  
53
			values.get(nodeName).add(element);
54
		}
55
	}
56

  
57
	@Override
58
	public ElementList get(final Object key) {
59
		ElementList e = super.get(key);
60
		return e != null ? e : new ElementList();
61
	}
62

  
63
	@Override
64
	public String toString() {
65
		StringBuilder sb = new StringBuilder();
66
		sb.append("{");
67
		for (String k : this.keySet()) {
68
			sb.append(k).append("=").append(this.get(k)).append("\n");
69
		}
70
		sb.append("}");
71
		return sb.toString();
72
	}
73
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml/ElementList.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8

  
9
@SuppressWarnings("serial")
10
public class ElementList extends ArrayList<Element> {
11
	public List<String> listValues() {
12
		return Lists.newArrayList(Iterables.transform(this, e -> e.getText()));
13
	}
14
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml2/Utils.java
1
package eu.dnetlib.data.transform.xml2;
2

  
3
import java.nio.charset.Charset;
4
import java.util.Collection;
5
import java.util.Map;
6
import java.util.stream.Collectors;
7
import java.util.stream.Stream;
8

  
9
import com.google.common.collect.Maps;
10
import com.google.protobuf.Descriptors;
11
import com.google.protobuf.InvalidProtocolBufferException;
12
import com.google.protobuf.Message;
13
import eu.dnetlib.data.proto.FieldTypeProtos.*;
14
import eu.dnetlib.data.proto.OafProtos.OafRel;
15
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
16
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
17
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
18
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
19
import org.apache.commons.lang3.StringUtils;
20
import org.apache.commons.lang3.math.NumberUtils;
21

  
22
import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.xpath;
23

  
24
public class Utils {
25

  
26
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
27

  
28
	public static final String ID_SEPARATOR = "::";
29

  
30
	public static final String TITLE_TYPE = "titleType";
31
	public static final String DATE_TYPE = "dateType";
32
	public static final String KEYWORD = "keyword";
33

  
34
	public static final String DNET_EXT_REF_TYPOLOGIES = "dnet:externalReference_typologies";
35
	public static final String DNET_TITLE_TYPOLOGIES = "dnet:dataCite_title";
36
	public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
37
	public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
38
	public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
39
	public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
40
	public static final String DNET_ACCESS_MODES = "dnet:access_modes";
41
	public static final String DNET_LANGUAGES = "dnet:languages";
42
	public static final String DNET_PID_TYPES = "dnet:pid_types";
43

  
44
	public static final String IDENTIFIER_TYPE = "identifierType";
45
	public static final String ALTERNATE_IDENTIFIER_TYPE = "alternateIdentifierType";
46
	public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
47

  
48
	public static final String CLASSID = "classid";
49
	public static final String CLASSNAME = "classname";
50
	public static final String SCHEMEID = "schemeid";
51
	public static final String SCHEMENAME = "schemename";
52

  
53
	public static final String RELATION_TYPE = "relationType";
54
	public static final String RELATED_IDENTIFIER_TYPE = "relatedIdentifierType";
55
	public static final String RIGHTS_URI = "rightsURI";
56

  
57
	public static final String UTF_8 = "UTF-8";
58

  
59
	// publication
60
	public static final String PROJECTID = "projectid";
61
	public static final String RELATED_DATASET = "relatedDataSet";
62
	public static final String RELATED_PUBLICATION = "relatedPublication";
63
	public static final String RELATED_IDENTIFIER = "relatedIdentifier";
64

  
65
	protected static Map<String, String> mappingAccess = Maps.newHashMap();
66

  
67
	static {
68
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
69
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
70
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
71
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
72

  
73
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
74
		mappingAccess.put("OPEN", "OPEN");
75
		mappingAccess.put("CLOSED", "CLOSED");
76
		mappingAccess.put("RESTRICTED", "RESTRICTED");
77
		mappingAccess.put("EMBARGO", "EMBARGO");
78
	}
79

  
80
	public static String getValue(final Node node, final String defaultValue) {
81
		return (node != null && StringUtils.isNotBlank(node.getTextValue())) ? node.getTextValue() : defaultValue;
82
	}
83

  
84
	public static String getValue(final String value, final String defaultValue) {
85
		return StringUtils.isNotBlank(value) ? value : defaultValue;
86
	}
87

  
88
	public static KeyValue getKV(final String id, final String name) {
89
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
90
	}
91

  
92
	public static Qualifier getSimpleQualifier(final String classname, final String schemename) {
93
		return getQualifier(classname, classname, schemename, schemename);
94
	}
95

  
96
	public static Qualifier getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
97
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename).build();
98
	}
99

  
100
	public static StructuredProperty getStructuredProperty(final String value,
101
			final String classid,
102
			final String classname,
103
			final String schemeid,
104
			final String schemename) {
105
		if ((value == null) || value.isEmpty()) return null;
106
		return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
107
	}
108

  
109
	/**
110
	 * Gets the classname of the given class code
111
	 *
112
	 * @param code class code.
113
	 * @return the class name, if the code is a key of the map. The code itself otherwise.
114
	 */
115
	public static String getClassName(final String code) {
116
		final String classname = AbstractDNetXsltFunctions.code2name.get(code);
117
		if (StringUtils.isBlank(classname)) return code;
118
		return classname;
119
	}
120

  
121
	public static String metadataXpath(final String otherValues) {
122
		return xpath("record", "metadata", otherValues);
123
	}
124

  
125
	public static void addField(final Message.Builder builder, final Descriptors.FieldDescriptor descriptor, final Object value) {
126

  
127
		if (value == null) return;
128

  
129
		if (value instanceof Stream) {
130
			addField(builder, descriptor, ((Stream) value).collect(Collectors.toList()));
131
		} else if (value instanceof Collection<?>) {
132
			for (final Object o : (Collection<Object>) value) {
133
				addField(builder, descriptor, o);
134
			}
135
		} else {
136
			Object v = value;
137
			switch (descriptor.getType()) {
138
			case BOOL:
139
				v = Boolean.valueOf(value.toString());
140
				break;
141
			case BYTES:
142
				v = value.toString().getBytes(Charset.forName(UTF_8));
143
				break;
144
			case DOUBLE:
145
				v = Double.valueOf(value.toString());
146
				break;
147
			case FLOAT:
148
				v = Float.valueOf(value.toString());
149
				break;
150
			case INT32:
151
			case INT64:
152
			case SINT32:
153
			case SINT64:
154
				v = Integer.valueOf(value.toString());
155
				break;
156
			case MESSAGE:
157
				final Message.Builder q = builder.newBuilderForField(descriptor);
158

  
159
				if (value instanceof Message.Builder) {
160
					v = ((Message.Builder) value).build();
161
					final byte[] b = ((Message) v).toByteArray();
162
					try {
163
						q.mergeFrom(b);
164
					} catch (final InvalidProtocolBufferException e) {
165
						throw new IllegalArgumentException("Unable to merge value: " + v + " with builder: " + q.getDescriptorForType().getName());
166
					}
167
				} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
168
					if (value instanceof Qualifier) {
169
						q.mergeFrom((Qualifier) v);
170
					}
171
				} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
172
					if (value instanceof StructuredProperty) {
173
						q.mergeFrom((StructuredProperty) v);
174
					}
175
				} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
176
					if (value instanceof KeyValue) {
177
						q.mergeFrom((KeyValue) v);
178
					}
179
				} else if (Journal.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
180
					if (value instanceof Journal) {
181
						q.mergeFrom((Journal) v);
182
					}
183
				} else if (Context.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
184
					if (value instanceof Context) {
185
						q.mergeFrom((Context) v);
186
					}
187
				} else if (Author.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
188
					if (value instanceof Author) {
189
						q.mergeFrom((Author) v);
190
					}
191
				} else if (ExternalReference.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
192
					if (value instanceof ExternalReference) {
193
						q.mergeFrom((ExternalReference) v);
194
					}
195
				} else if (OafRel.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
196
					if (value instanceof OafRel) {
197
						q.mergeFrom((OafRel) v);
198
					}
199
				} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
200
					if (value instanceof StringField) {
201
						q.mergeFrom((StringField) v);
202
					} else {
203
						q.setField(StringField.getDescriptor().findFieldByName("value"), v);
204
					}
205
				} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
206
					if (value instanceof BoolField) {
207
						q.mergeFrom((BoolField) v);
208
					} else if (value instanceof String) {
209
						q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) v));
210
					} else {
211
						q.setField(BoolField.getDescriptor().findFieldByName("value"), v);
212
					}
213
				} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
214
					if (value instanceof IntField) {
215
						q.mergeFrom((IntField) v);
216
					} else if (value instanceof String) {
217
						q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) v));
218
					} else {
219
						q.setField(IntField.getDescriptor().findFieldByName("value"), v);
220
					}
221
				}
222

  
223
				v = q.buildPartial();
224
				break;
225
			default:
226
				break;
227
			}
228

  
229
			doAddField(builder, descriptor, v);
230
		}
231
	}
232

  
233
	private static void doAddField(final Message.Builder builder, final Descriptors.FieldDescriptor fd, final Object value) {
234
		if (value != null) {
235
			if (fd.isRepeated()) {
236
				builder.addRepeatedField(fd, value);
237
			} else if (fd.isOptional() || fd.isRequired()) {
238
				builder.setField(fd, value);
239
			}
240
		}
241
	}
242

  
243
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/resources/eu/dnetlib/pace/result.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {
14
		"strictConditions" : [
15
  			{ "name" : "pidMatch", "fields" : [ "pid" ] }
16
  		],	
17
		"conditions" : [
18
  			{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] },
19
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] }		
20
  		 ],		
21
		"model" : [
22
			{ "name" : "pid", "algo" : "Null", "type" : "JSON", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid", "overrideMatch" : "true" },
23
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
24
			{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" }
25
		],
26
		"blacklists" : { } 		
27
	}
28

  
29
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml2/PublicationToProto.java
1
package eu.dnetlib.data.transform.xml2;
2

  
3
import java.util.Map;
4

  
5
import com.google.common.collect.Maps;
6

  
7
import static eu.dnetlib.data.transform.xml2.Utils.*;
8
import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.xpath;
9

  
10
public class PublicationToProto extends AbstractResultDom4jParser {
11

  
12
    public PublicationToProto() {
13
        super(getFields());
14
    }
15

  
16
    public PublicationToProto(final boolean invisible, final String provenance, final String trust) {
17
        super(invisible, provenance, trust, getFields());
18
    }
19

  
20
    @Override
21
    protected String getResulttype(final String cobjcategory) {
22
        switch (cobjcategory) {
23
        case "0029":
24
            return "software";
25
        default:
26
            return "publication";
27
        }
28
    }
29

  
30
    protected static Map<String, String> getFields() {
31
        final Map<String, String> fields = Maps.newHashMap();
32

  
33
        fields.put("originalId", xpath("record", "header", "recordIdentifier"));
34
        fields.put("dateofcollection", xpath("record", "header", "dateOfCollection"));
35
        fields.put("dateoftransformation", xpath("record", "header", "dateOfTransformation"));
36
        fields.put("collectedfrom", metadataXpath("collectedFrom"));
37
        fields.put("pid", metadataXpath("identifier"));
38
        fields.put("license", metadataXpath("license"));
39
        fields.put("accessright", metadataXpath("accessrights"));
40
        fields.put("instancetype", metadataXpath("CobjCategory"));
41
        fields.put("hostedby", metadataXpath("hostedBy"));
42
        fields.put("url", metadataXpath("identifier"));
43
        fields.put("title", metadataXpath("title"));
44
        fields.put("description", metadataXpath("description"));
45
        fields.put("dateofacceptance", metadataXpath("dateAccepted"));
46
        fields.put("embargoenddate", metadataXpath("embargoenddate"));
47
        fields.put("storagedate", metadataXpath("storagedate"));
48
        fields.put("author", metadataXpath("creator"));
49
        fields.put("contributor", metadataXpath("contributor"));
50
        fields.put("subject", metadataXpath("subject"));
51
        fields.put("format", metadataXpath("format"));
52
        fields.put("source", metadataXpath("source"));
53
        fields.put("publisher", metadataXpath("publisher"));
54
        fields.put("language", metadataXpath("language"));
55
        fields.put("resulttype", metadataXpath("CobjCategory"));
56
        fields.put("concept", metadataXpath("concept"));
57
        fields.put("externalReference", metadataXpath("reference"));
58

  
59
        fields.put("cachedRel", String.format("%s | %s | %s",
60
                metadataXpath("projectid"),
61
                metadataXpath("relatedDataSet"),
62
                xpath("record", "metadata") + "//*[local-name()='relatedIdentifier']"));
63

  
64
        return fields;
65
    }
66

  
67
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/main/java/eu/dnetlib/data/transform/xml2/VtdException.java
1
package eu.dnetlib.data.transform.xml2;
2

  
3
/**
4
 * Created by claudio on 18/01/2017.
5
 */
6
public class VtdException extends Exception {
7

  
8
	public VtdException(final Exception e) {
9
		super(e);
10
	}
11

  
12
	public VtdException(final Throwable e) {
13
		super(e);
14
	}
15

  
16
	public VtdException(final String msg, final Exception e) {
17
		super(msg, e);
18
	}
19

  
20
	public VtdException(final String msg, final Throwable e) {
21
		super(msg, e);
22
	}
23
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.17/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.List;
4

  
5
import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv;
6
import org.junit.After;
7
import org.junit.Before;
8
import org.junit.Test;
9

  
10
import static org.junit.Assert.assertEquals;
11

  
12

  
13
/**
14
 * OpenTrialsXsltFunctions Tester.
15
 *
16
 */
17
public class OpenTrialsXsltFunctionsTest {
18

  
19
	private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
20
	private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
21
	private String jidentifiers = "{112683,NCT00920439}";
22

  
23

  
24
	private String jsonRecord = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : true},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
25
	private String jsonRecordNull = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\"},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
26
	private String jsonRecordVoid = "[{\"source_id\" : \"\", \"source_url\" : \"\", \"is_primary\" : \"\"}]";
27
	private String jsonRecondPrimary = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : false},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
28

  
29
	private String jsonPeopleVoid ="[{\"person_name\" : null, \"person_id\" : null, \"person_role\" : null}]";
30
	private String jsonPeopleOne = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"}]";
31
	private String jsonPeopleMore = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"},{\"person_name\" : \"Miriam Pippolippo Baglioni, PhD\", \"person_id\" : \"fake\", \"person_role\" : \"principal_investigator\"}]";
32

  
33
	private String jsonOrganizationVoid = "[{\"organization_name\" : null, \"organization_id\" : null, \"organization_role\" : null}]";
34
	private String jsonOrganizationOne = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"}]";
35
	private String jsonOrganizationMore = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"},{\"organization_name\" : \"Miriam Baglioni AB\", \"organization_id\" : \"fake\", \"organization_role\" : \"primary_sponsor\"}]";
36

  
37
	private String jsonLocationVoid = "[{\"location_name\" : null}]";
38
	private String jsonLocationOne = "[{\"location_name\" : \"China\"}]";
39
	private String jsonLocationMore = "[{\"location_name\" : \"China\"},{\"location_name\" : \"North Korea\"}]";
40

  
41
	@Before
42
	public void before() throws Exception {
43
	}
44

  
45
	@After
46
	public void after() throws Exception {
47
	}
48

  
49
	/**
50
	 * Method: getProvs(String jsonProvList)
51
	 */
52
	@Test
53
	public void testGetProvs() throws Exception {
54
		List<JsonProv> list = OpenTrialsXsltFunctions.getProvs(jsonProv);
55
		assertEquals(2, list.size());
56
	}
57

  
58
	/**
59
	 * Method: getMainIdentifierURL(String jsonProvList)
60
	 */
61
	@Test
62
	public void testGetMainIdentifierURL() throws Exception {
63
		String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv);
64
		assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url );
65
		url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull);
66
		assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url);
67
	}
68

  
69
	@Test
70
	public void testGetPrimaryRecordUrl(){
71
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecord);
72
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
73
	}
74

  
75
	@Test
76
	public void testGetPrimaryRecordID(){
77
		String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecord);
78
		assertEquals("nct", id);
79
	}
80

  
81
	@Test
82
	public void testGetPrimaryRecordUrlNull(){
83
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordNull);
84
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
85
	}
86

  
87
	@Test
88
	public void testGetPrimaryRecordUrlVoid(){
89
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordVoid);
90
		assertEquals("", url);
91
	}
92

  
93
	@Test
94
	public void testGetPrimaryRecordUrlNoPrimary(){
95
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecondPrimary);
96
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
97
	}
98
	@Test
99
	public void testGetPrimaryRecordIDNoPrimary(){
100
		String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecondPrimary);
101
		assertEquals("nct", id);
102
	}
103
	@Test
104
	public void testGetPrincipalInvestigatorsVoid(){
105
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleVoid);
106
		assertEquals("",url);
107
	}
108

  
109

  
110
	@Test
111
	public void testGetPrincipalInvestigatorsOne(){
112
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleOne);
113
		assertEquals("Verheul, Henk", url);
114
	}
115

  
116
	@Test
117
	public void testGetPrincipalInvestigatorsMore(){
118
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleMore);
119
		assertEquals("Verheul, Henk@@Baglioni, Miriam Pippolippo", url);
120
	}
121

  
122

  
123

  
124
	@Test
125
	public void testgGetTrialOrganizationsVoid(){
126
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationVoid);
127
		assertEquals("",url);
128
	}
129

  
130

  
131
	@Test
132
	public void testgGetTrialOrganizationsOne(){
133
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationOne);
134
		assertEquals("Södertälje sjukhus AB@sponsor", url);
135
	}
136

  
137
	@Test
138
	public void testgGetTrialOrganizationsMore(){
139
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationMore);
140
		assertEquals("Södertälje sjukhus AB@sponsor@@Miriam Baglioni AB@sponsor", url);
141
	}
142

  
143
	@Test
144
	public void testgGetTrialLocationsVoid(){
145
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationVoid);
146
		assertEquals("",url);
147
	}
148

  
149

  
150
	@Test
151
	public void testgGetTrialLocationsOne(){
152
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationOne);
153
		assertEquals("China", url);
154
	}
155

  
156
	@Test
157
	public void testgGetTrialLocationsMore(){
158
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationMore);
159
		assertEquals("China@@North Korea", url);
160
	}
161

  
162
	@Test
163
	public void testGetNotPrimaryRecordUrlPrimary(){
164
		String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecondPrimary);
165
		assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url);
166
	}
167

  
168
	@Test
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff