Revision 52669
Added by Claudio Atzori over 6 years ago
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet45-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-mapping-utils</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>6.2.5</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5</developerConnection> |
|
16 |
</scm> |
|
17 |
|
|
18 |
<properties> |
|
19 |
<skipITs>true</skipITs> |
|
20 |
</properties> |
|
21 |
<build> |
|
22 |
<plugins> |
|
23 |
<plugin> |
|
24 |
<groupId>org.apache.maven.plugins</groupId> |
|
25 |
<artifactId>maven-failsafe-plugin</artifactId> |
|
26 |
<version>2.19.1</version> |
|
27 |
<executions> |
|
28 |
<execution> |
|
29 |
<id>integration-test</id> |
|
30 |
<goals> |
|
31 |
<goal>integration-test</goal> |
|
32 |
</goals> |
|
33 |
</execution> |
|
34 |
<execution> |
|
35 |
<id>verify</id> |
|
36 |
<goals> |
|
37 |
<goal>verify</goal> |
|
38 |
</goals> |
|
39 |
</execution> |
|
40 |
</executions> |
|
41 |
<configuration> |
|
42 |
<skipITs>${skipITs}</skipITs> |
|
43 |
</configuration> |
|
44 |
</plugin> |
|
45 |
</plugins> |
|
46 |
</build> |
|
47 |
|
|
48 |
<dependencies> |
|
49 |
<dependency> |
|
50 |
<groupId>com.google.guava</groupId> |
|
51 |
<artifactId>guava</artifactId> |
|
52 |
<version>${google.guava.version}</version> |
|
53 |
</dependency> |
|
54 |
<dependency> |
|
55 |
<groupId>junit</groupId> |
|
56 |
<artifactId>junit</artifactId> |
|
57 |
<version>${junit.version}</version> |
|
58 |
<scope>test</scope> |
|
59 |
</dependency> |
|
60 |
<dependency> |
|
61 |
<groupId>com.ximpleware</groupId> |
|
62 |
<artifactId>vtd-xml</artifactId> |
|
63 |
<version>[2.12, 3.0.0)</version> |
|
64 |
</dependency> |
|
65 |
<dependency> |
|
66 |
<groupId>commons-codec</groupId> |
|
67 |
<artifactId>commons-codec</artifactId> |
|
68 |
<version>${commons.codec.version}</version> |
|
69 |
</dependency> |
|
70 |
<dependency> |
|
71 |
<groupId>dom4j</groupId> |
|
72 |
<artifactId>dom4j</artifactId> |
|
73 |
<version>${dom4j.version}</version> |
|
74 |
<exclusions> |
|
75 |
<exclusion> |
|
76 |
<artifactId>xml-apis</artifactId> |
|
77 |
<groupId>xml-apis</groupId> |
|
78 |
</exclusion> |
|
79 |
</exclusions> |
|
80 |
</dependency> |
|
81 |
<dependency> |
|
82 |
<groupId>net.sf.supercsv</groupId> |
|
83 |
<artifactId>super-csv</artifactId> |
|
84 |
<version>2.4.0</version> |
|
85 |
</dependency> |
|
86 |
<dependency> |
|
87 |
<groupId>eu.dnetlib</groupId> |
|
88 |
<artifactId>dnet-openaire-data-protos</artifactId> |
|
89 |
<version>[3.9.2]</version> |
|
90 |
</dependency> |
|
91 |
<dependency> |
|
92 |
<groupId>eu.dnetlib</groupId> |
|
93 |
<artifactId>dnet-pace-core</artifactId> |
|
94 |
<version>[2.0.0,3.0.0)</version> |
|
95 |
</dependency> |
|
96 |
<dependency> |
|
97 |
<groupId>eu.dnetlib</groupId> |
|
98 |
<artifactId>cnr-misc-utils</artifactId> |
|
99 |
<version>[1.0.0,2.0.0)</version> |
|
100 |
</dependency> |
|
101 |
<dependency> |
|
102 |
<groupId>eu.dnetlib</groupId> |
|
103 |
<artifactId>dnet-hadoop-commons</artifactId> |
|
104 |
<version>[2.0.0,3.0.0)</version> |
|
105 |
</dependency> |
|
106 |
<dependency> |
|
107 |
<groupId>eu.dnetlib</groupId> |
|
108 |
<artifactId>dnet-index-solr-common</artifactId> |
|
109 |
<version>[1.0.0,1.3.1]</version> |
|
110 |
<!-- uncomment to include solrj 7.2.0 --> |
|
111 |
<!--<version>[1.0.0,2.0.0]</version>--> |
|
112 |
</dependency> |
|
113 |
<dependency> |
|
114 |
<groupId>com.googlecode.protobuf-java-format</groupId> |
|
115 |
<artifactId>protobuf-java-format</artifactId> |
|
116 |
<version>1.2</version> |
|
117 |
</dependency> |
|
118 |
<dependency> |
|
119 |
<groupId>org.apache.commons</groupId> |
|
120 |
<artifactId>commons-lang3</artifactId> |
|
121 |
<version>3.5</version> |
|
122 |
</dependency> |
|
123 |
|
|
124 |
<!-- test deps --> |
|
125 |
<dependency> |
|
126 |
<groupId>eu.dnetlib</groupId> |
|
127 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
128 |
<version>[1.0.0,2.0.0)</version> |
|
129 |
<scope>test</scope> |
|
130 |
</dependency> |
|
131 |
<dependency> |
|
132 |
<groupId>org.mongodb</groupId> |
|
133 |
<artifactId>mongo-java-driver</artifactId> |
|
134 |
<version>${mongodb.driver.version}</version> |
|
135 |
<scope>test</scope> |
|
136 |
</dependency> |
|
137 |
<dependency> |
|
138 |
<groupId>org.springframework</groupId> |
|
139 |
<artifactId>spring-context</artifactId> |
|
140 |
<version>${spring.version}</version> |
|
141 |
<scope>test</scope> |
|
142 |
</dependency> |
|
143 |
<dependency> |
|
144 |
<groupId>org.springframework</groupId> |
|
145 |
<artifactId>spring-core</artifactId> |
|
146 |
<version>${spring.version}</version> |
|
147 |
<scope>test</scope> |
|
148 |
</dependency> |
|
149 |
<dependency> |
|
150 |
<groupId>org.springframework</groupId> |
|
151 |
<artifactId>spring-test</artifactId> |
|
152 |
<version>${spring.version}</version> |
|
153 |
<scope>test</scope> |
|
154 |
</dependency> |
|
155 |
|
|
156 |
</dependencies> |
|
157 |
</project> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/transform/SolrProtoMapper.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
|
|
5 |
import org.apache.commons.codec.binary.Base64; |
|
6 |
import org.apache.commons.lang.StringUtils; |
|
7 |
import org.apache.solr.common.SolrInputDocument; |
|
8 |
import org.dom4j.Document; |
|
9 |
import org.dom4j.DocumentException; |
|
10 |
import org.dom4j.Element; |
|
11 |
import org.dom4j.io.SAXReader; |
|
12 |
|
|
13 |
import com.google.common.base.Splitter; |
|
14 |
import com.google.common.collect.Lists; |
|
15 |
import com.google.protobuf.GeneratedMessage; |
|
16 |
|
|
17 |
import eu.dnetlib.pace.config.Type; |
|
18 |
|
|
19 |
/** |
|
20 |
* The Class ProtoDocumentMapper. |
|
21 |
*/ |
|
22 |
public class SolrProtoMapper extends AbstractProtoMapper { |
|
23 |
|
|
24 |
private static final String ID_SEPARATOR = "::"; |
|
25 |
|
|
26 |
/** The fields. */ |
|
27 |
private Document fields; |
|
28 |
|
|
29 |
/** |
|
30 |
* Instantiates a new proto document mapper. |
|
31 |
* |
|
32 |
* @param fields |
|
33 |
* the fields |
|
34 |
* @throws DocumentException |
|
35 |
* the document exception |
|
36 |
*/ |
|
37 |
public SolrProtoMapper(final String fields) throws DocumentException { |
|
38 |
this.fields = parse(fields); |
|
39 |
|
|
40 |
if (StringUtils.isBlank(this.fields.valueOf("//FIELD[@name = 'objIdentifier']/@name"))) |
|
41 |
throw new IllegalArgumentException("field objIdentifier is mandatory"); |
|
42 |
} |
|
43 |
|
|
44 |
/** |
|
45 |
* Map. |
|
46 |
* |
|
47 |
* @param proto |
|
48 |
* the proto |
|
49 |
* @param version |
|
50 |
* the version |
|
51 |
* @param dsId |
|
52 |
* the ds id |
|
53 |
* @return the solr input document |
|
54 |
* @throws DocumentException |
|
55 |
* the document exception |
|
56 |
*/ |
|
57 |
public SolrInputDocument map(final GeneratedMessage proto, final String version, final String dsId, final String actionSetId) throws DocumentException { |
|
58 |
|
|
59 |
final SolrInputDocument doc = new SolrInputDocument(); |
|
60 |
|
|
61 |
for (final Object o : fields.selectNodes("//FIELD[string(@path)]")) { |
|
62 |
final Element e = (Element) o; |
|
63 |
|
|
64 |
final String name = e.attribute("name").getValue().toLowerCase().trim(); |
|
65 |
final String path = e.attribute("path").getValue(); |
|
66 |
|
|
67 |
doc.setField(name, processMultiPath(proto, Lists.newLinkedList(Splitter.on("|").trimResults().split(path)), Type.String)); |
|
68 |
} |
|
69 |
|
|
70 |
final String objIdentifier = patchId((String) doc.getFieldValue("objidentifier")); |
|
71 |
doc.setField("objidentifier", objIdentifier); |
|
72 |
doc.setField("__indexrecordidentifier", getRecordId(objIdentifier, actionSetId)); |
|
73 |
doc.setField("__dsid", dsId); |
|
74 |
doc.setField("__dsversion", version); |
|
75 |
doc.setField("__result", Base64.encodeBase64String(proto.toByteArray())); |
|
76 |
doc.setField("actionset", actionSetId); |
|
77 |
|
|
78 |
return doc; |
|
79 |
} |
|
80 |
|
|
81 |
public String getRecordId(final String objIdentifier, final String actionSetId) { |
|
82 |
return objIdentifier + ID_SEPARATOR + actionSetId; |
|
83 |
} |
|
84 |
|
|
85 |
/** |
|
86 |
* Patch the objidentifier: when it comes from HBase, i.e. contains the separator '|' returns the string that follows. |
|
87 |
* |
|
88 |
* @param objidentifier |
|
89 |
* the objidentifier |
|
90 |
* @return the string |
|
91 |
*/ |
|
92 |
private String patchId(final String objidentifier) { |
|
93 |
return objidentifier.contains("|") ? StringUtils.substringAfter(objidentifier, "|") : objidentifier; |
|
94 |
} |
|
95 |
|
|
96 |
/** |
|
97 |
* Parses the. |
|
98 |
* |
|
99 |
* @param s |
|
100 |
* the s |
|
101 |
* @return the document |
|
102 |
* @throws DocumentException |
|
103 |
* the document exception |
|
104 |
*/ |
|
105 |
private Document parse(final String s) throws DocumentException { |
|
106 |
return new SAXReader().read(new StringReader(s)); |
|
107 |
} |
|
108 |
|
|
109 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/transform/TrustOrdering.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import com.google.common.collect.ImmutableList; |
|
4 |
import com.google.common.collect.Ordering; |
|
5 |
|
|
6 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
7 |
import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust; |
|
8 |
|
|
9 |
public class TrustOrdering extends Ordering<Oaf> { |
|
10 |
|
|
11 |
@Override |
|
12 |
public int compare(Oaf left, Oaf right) { |
|
13 |
String lTrust = left.getDataInfo().getTrust(); |
|
14 |
String rTrust = right.getDataInfo().getTrust(); |
|
15 |
|
|
16 |
if (lTrust.equals(rTrust)) return 0; |
|
17 |
|
|
18 |
if (lTrust.equals(SpecialTrust.INFINITE.toString())) return 1; |
|
19 |
if (rTrust.equals(SpecialTrust.INFINITE.toString())) return -1; |
|
20 |
|
|
21 |
if (lTrust.equals(SpecialTrust.NEUTRAL.toString())) return 1; |
|
22 |
if (rTrust.equals(SpecialTrust.NEUTRAL.toString())) return -1; |
|
23 |
|
|
24 |
return Float.compare(Float.parseFloat(lTrust), Float.parseFloat(rTrust)); |
|
25 |
} |
|
26 |
|
|
27 |
public static ImmutableList<Oaf> sort(Iterable<Oaf> entities) { |
|
28 |
return new TrustOrdering().immutableSortedCopy(entities); |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java | ||
---|---|---|
1 |
package eu.dnetlib.data.index; |
|
2 |
|
|
3 |
import java.io.Closeable; |
|
4 |
import java.io.IOException; |
|
5 |
import java.text.SimpleDateFormat; |
|
6 |
import java.util.Date; |
|
7 |
import java.util.List; |
|
8 |
|
|
9 |
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory; |
|
10 |
import eu.dnetlib.miscutils.datetime.HumanTime; |
|
11 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
12 |
import org.apache.commons.logging.Log; |
|
13 |
import org.apache.commons.logging.LogFactory; |
|
14 |
import org.apache.solr.client.solrj.SolrQuery; |
|
15 |
import org.apache.solr.client.solrj.impl.CloudSolrServer; |
|
16 |
import org.apache.solr.client.solrj.response.QueryResponse; |
|
17 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
|
18 |
import org.apache.solr.common.SolrInputDocument; |
|
19 |
|
|
20 |
/** |
|
21 |
* Created by michele on 11/11/15. |
|
22 |
*/ |
|
23 |
public class CloudIndexClient implements Closeable { |
|
24 |
|
|
25 |
private static final Log log = LogFactory.getLog(CloudIndexClient.class); |
|
26 |
private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult"; |
|
27 |
|
|
28 |
private final CloudSolrServer solrClient; |
|
29 |
|
|
30 |
protected CloudIndexClient(final CloudSolrServer solrServer) { |
|
31 |
this.solrClient = solrServer; |
|
32 |
} |
|
33 |
|
|
34 |
public int feed(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord) throws CloudIndexClientException { |
|
35 |
return feed(record, indexDsId, toIndexRecord, true); |
|
36 |
} |
|
37 |
|
|
38 |
public int feed(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord, final boolean commit) |
|
39 |
throws CloudIndexClientException { |
|
40 |
try { |
|
41 |
final SolrInputDocument doc = prepareSolrDocument(record, indexDsId, toIndexRecord); |
|
42 |
if ((doc == null) || doc.isEmpty()) throw new CloudIndexClientException("Invalid solr document"); |
|
43 |
return feed(doc, commit); |
|
44 |
} catch (final Throwable e) { |
|
45 |
throw new CloudIndexClientException("Error feeding document", e); |
|
46 |
} |
|
47 |
} |
|
48 |
|
|
49 |
public int feed(final SolrInputDocument document) throws CloudIndexClientException { |
|
50 |
return feed(document, true); |
|
51 |
} |
|
52 |
|
|
53 |
public int feed(final SolrInputDocument document, final boolean commit) throws CloudIndexClientException { |
|
54 |
try { |
|
55 |
final UpdateResponse res = solrClient.add(document); |
|
56 |
log.debug("feed time for single records, elapsed time: " + HumanTime.exactly(res.getElapsedTime())); |
|
57 |
if (res.getStatus() != 0) { throw new CloudIndexClientException("bad status: " + res.getStatus()); } |
|
58 |
if (commit) { |
|
59 |
solrClient.commit(); |
|
60 |
} |
|
61 |
return res.getStatus(); |
|
62 |
} catch (final Throwable e) { |
|
63 |
throw new CloudIndexClientException("Error feeding document", e); |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback) throws CloudIndexClientException { |
|
68 |
feed(docs, callback, true); |
|
69 |
} |
|
70 |
|
|
71 |
public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback, final boolean commit) throws CloudIndexClientException { |
|
72 |
try { |
|
73 |
if (docs.isEmpty()) { |
|
74 |
log.debug("Empty list of documents. Calling callback, if needed."); |
|
75 |
if (callback != null) { |
|
76 |
callback.doAfterFeeding(null); |
|
77 |
} |
|
78 |
return; |
|
79 |
} |
|
80 |
final UpdateResponse res = solrClient.add(docs); |
|
81 |
|
|
82 |
log.debug("feed time for " + docs.size() + " records, elapsed tipe: : " + HumanTime.exactly(res.getElapsedTime())); |
|
83 |
|
|
84 |
if (commit) { |
|
85 |
solrClient.commit(); |
|
86 |
} |
|
87 |
if (callback != null) { |
|
88 |
callback.doAfterFeeding(res); |
|
89 |
} |
|
90 |
if (res.getStatus() != 0) throw new CloudIndexClientException("bad status: " + res.getStatus()); |
|
91 |
} catch (final Throwable e) { |
|
92 |
throw new CloudIndexClientException("Error feeding documents", e); |
|
93 |
} |
|
94 |
} |
|
95 |
|
|
96 |
public SolrInputDocument prepareSolrDocument(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord) |
|
97 |
throws CloudIndexClientException { |
|
98 |
try { |
|
99 |
final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory(); |
|
100 |
|
|
101 |
final String version = (new SimpleDateFormat("yyyy-MM-dd\'T\'hh:mm:ss\'Z\'")).format(new Date()); |
|
102 |
final String indexRecord = toIndexRecord.evaluate(record); |
|
103 |
|
|
104 |
if (log.isDebugEnabled()) { |
|
105 |
log.debug("***************************************\nSubmitting index record:\n" + indexRecord + "\n***************************************\n"); |
|
106 |
} |
|
107 |
|
|
108 |
return documentFactory.parseDocument(version, indexRecord, indexDsId, INDEX_RECORD_RESULT_FIELD); |
|
109 |
} catch (final Throwable e) { |
|
110 |
throw new CloudIndexClientException("Error creating solr document", e); |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
public boolean isRecordIndexed(final String id) throws CloudIndexClientException { |
|
115 |
final QueryResponse res = query("objidentifier:\"" + id + "\"", null); |
|
116 |
return res.getResults().size() > 0; |
|
117 |
} |
|
118 |
|
|
119 |
public int remove(final String id) throws CloudIndexClientException { |
|
120 |
return remove(id, true); |
|
121 |
} |
|
122 |
|
|
123 |
public int remove(final String id, final boolean commit) throws CloudIndexClientException { |
|
124 |
try { |
|
125 |
final UpdateResponse res = solrClient.deleteByQuery("objidentifier:\"" + id + "\""); |
|
126 |
if (commit) { |
|
127 |
solrClient.commit(); |
|
128 |
} |
|
129 |
return res.getResponse().size(); |
|
130 |
} catch (final Throwable e) { |
|
131 |
throw new CloudIndexClientException("Error removing documents", e); |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
public int count(final String query) throws CloudIndexClientException { |
|
136 |
final QueryResponse res = query(query, 0); |
|
137 |
return res.getResults().size(); |
|
138 |
} |
|
139 |
|
|
140 |
public QueryResponse query(final String query, Integer rows) throws CloudIndexClientException { |
|
141 |
try { |
|
142 |
final SolrQuery solrQuery = new SolrQuery(); |
|
143 |
solrQuery.setQuery(query); |
|
144 |
if(rows != null && rows >= 0) { |
|
145 |
solrQuery.setRows(rows); |
|
146 |
} |
|
147 |
return solrClient.query(solrQuery); |
|
148 |
} catch (final Throwable e) { |
|
149 |
throw new CloudIndexClientException("Error searching documents", e); |
|
150 |
} |
|
151 |
} |
|
152 |
|
|
153 |
public void close() throws IOException { |
|
154 |
if (solrClient != null) { |
|
155 |
solrClient.shutdown(); |
|
156 |
} |
|
157 |
} |
|
158 |
|
|
159 |
public interface AfterFeedingCallback { |
|
160 |
|
|
161 |
void doAfterFeeding(final UpdateResponse response); |
|
162 |
} |
|
163 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2insertActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
5 |
xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions" |
|
6 |
xmlns:exslt="http://exslt.org/common" |
|
7 |
version="1.0" extension-element-prefixes="exslt" |
|
8 |
exclude-result-prefixes="xsl oaf dr dri dnet exslt"> |
|
9 |
|
|
10 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
11 |
|
|
12 |
<xsl:param name="invisible" select="false()"/> |
|
13 |
<xsl:param name="trust" select="string('0.9')"/> |
|
14 |
<xsl:param name="provenance" select="string('sysimport:crosswalk:repository')"/> |
|
15 |
|
|
16 |
<xsl:template match="/*"> |
|
17 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
18 |
<xsl:variable name="dateofcollection" select="/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name()='dateOfCollection' and text()][1]"/> |
|
19 |
<xsl:variable name="dateoftransformation" select="/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name()='dateOfTransformation' and text()][1]"/> |
|
20 |
<xsl:variable name="metadata" select="exslt:node-set(/*[local-name() = 'record']/*[local-name()='metadata']/*)"/> |
|
21 |
<xsl:variable name="namespaceprefix"> |
|
22 |
<xsl:choose> |
|
23 |
|
|
24 |
<!-- TODO check namespaceprefix length is 12 --> |
|
25 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
26 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
27 |
</xsl:when> |
|
28 |
<xsl:otherwise> |
|
29 |
<xsl:value-of select="unknown_"/> |
|
30 |
</xsl:otherwise> |
|
31 |
</xsl:choose> |
|
32 |
</xsl:variable> |
|
33 |
|
|
34 |
<xsl:choose> |
|
35 |
<!-- <xsl:when test="count($metadata) = 0 or string-length($namespaceprefix) = 0"> --> |
|
36 |
<xsl:when test="count($metadata) = 0"> |
|
37 |
<ROWS/> |
|
38 |
</xsl:when> |
|
39 |
<xsl:otherwise> |
|
40 |
|
|
41 |
<xsl:variable name="objIdentifier" select="//dri:objIdentifier"/> |
|
42 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
43 |
|
|
44 |
<xsl:if test="string-length($resultId) > 0"> |
|
45 |
|
|
46 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/> |
|
47 |
<xsl:choose> |
|
48 |
<!-- check if we need to exclude datasets from an OAF MDStore--> |
|
49 |
<xsl:when test="$cobjcategory = '0021'"> |
|
50 |
<ROWS/> |
|
51 |
</xsl:when> |
|
52 |
<xsl:otherwise> |
|
53 |
<xsl:variable name="originalidTest" select="/*[local-name()='record']/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/> |
|
54 |
<xsl:variable name="originalid"> |
|
55 |
<xsl:choose> |
|
56 |
<xsl:when test="contains($originalidTest, '::')"> |
|
57 |
<xsl:value-of select="substring-after($originalidTest, '::')"/> |
|
58 |
</xsl:when> |
|
59 |
<xsl:otherwise> |
|
60 |
<xsl:value-of select="$originalidTest"/> |
|
61 |
</xsl:otherwise> |
|
62 |
</xsl:choose> |
|
63 |
</xsl:variable> |
|
64 |
|
|
65 |
<xsl:variable name="result" |
|
66 |
select="dnet:oafResult($resultId, $invisible, $provenance, $trust, $about, $originalid, $dateofcollection, $dateoftransformation, $metadata)"/> |
|
67 |
|
|
68 |
<ROWS> |
|
69 |
<ROW key="{$resultId}" columnFamily="result"> |
|
70 |
<QUALIFIER name="body" type="base64"> |
|
71 |
<xsl:value-of select="$result"/> |
|
72 |
</QUALIFIER> |
|
73 |
</ROW> |
|
74 |
|
|
75 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
76 |
|
|
77 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
78 |
|
|
79 |
<xsl:if test="string-length($projectId) > 0"> |
|
80 |
|
|
81 |
<xsl:variable name="resultproject" |
|
82 |
select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', |
|
83 |
$metadata, $provenance, $trust, $about)"/> |
|
84 |
<xsl:variable name="projectresult" |
|
85 |
select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', |
|
86 |
$metadata, $provenance, $trust, $about)"/> |
|
87 |
|
|
88 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
89 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
90 |
<xsl:value-of select="$resultproject"/> |
|
91 |
</QUALIFIER> |
|
92 |
</ROW> |
|
93 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
94 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
95 |
<xsl:value-of select="$projectresult"/> |
|
96 |
</QUALIFIER> |
|
97 |
</ROW> |
|
98 |
</xsl:if> |
|
99 |
</xsl:for-each> |
|
100 |
|
|
101 |
<xsl:for-each select="//*[local-name()='relatedDataset']"> |
|
102 |
|
|
103 |
<!-- relatedDataset ids must be in the openaire format --> |
|
104 |
<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
105 |
|
|
106 |
<xsl:if test="string-length($datasetId) > 0"> |
|
107 |
|
|
108 |
<xsl:variable name="resultDataset" |
|
109 |
select="dnet:rel($resultId, $datasetId, 'resultResult', 'publicationDataset', 'isRelatedTo', |
|
110 |
$metadata, $provenance, $trust, $about)"/> |
|
111 |
<xsl:variable name="datasetResult" |
|
112 |
select="dnet:rel($datasetId, $resultId, 'resultResult', 'publicationDataset', 'isRelatedTo', |
|
113 |
$metadata, $provenance, $trust, $about)"/> |
|
114 |
|
|
115 |
<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
116 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
117 |
<xsl:value-of select="$resultDataset"/> |
|
118 |
</QUALIFIER> |
|
119 |
</ROW> |
|
120 |
<ROW key="{$datasetId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
121 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
122 |
<xsl:value-of select="$datasetResult"/> |
|
123 |
</QUALIFIER> |
|
124 |
</ROW> |
|
125 |
</xsl:if> |
|
126 |
</xsl:for-each> |
|
127 |
</ROWS> |
|
128 |
</xsl:otherwise> |
|
129 |
</xsl:choose> |
|
130 |
</xsl:if> |
|
131 |
</xsl:otherwise> |
|
132 |
</xsl:choose> |
|
133 |
</xsl:template> |
|
134 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import com.google.protobuf.GeneratedMessage; |
|
4 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
5 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; |
|
6 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; |
|
7 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource; |
|
8 |
import eu.dnetlib.data.proto.DedupProtos.Dedup; |
|
9 |
import eu.dnetlib.data.proto.FieldTypeProtos.*; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
11 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
12 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
14 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
15 |
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; |
|
16 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
17 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
|
18 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
|
19 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
|
20 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
21 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
22 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
23 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
24 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
25 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
26 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context; |
|
27 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
28 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; |
|
29 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity; |
|
30 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
31 |
|
|
32 |
public class OafTest { |
|
33 |
|
|
34 |
public static final String CITATION_JSON = |
|
35 |
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>"; |
|
36 |
|
|
37 |
public static final String STATISTICS_JSON = |
|
38 |
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]"; |
|
39 |
|
|
40 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) { |
|
41 |
return getStructuredproperty(value, classname, schemename, null); |
|
42 |
} |
|
43 |
|
|
44 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) { |
|
45 |
final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename)); |
|
46 |
if (dataInfo != null) { |
|
47 |
sp.setDataInfo(dataInfo); |
|
48 |
} |
|
49 |
return sp; |
|
50 |
} |
|
51 |
|
|
52 |
public static Qualifier.Builder getQualifier(final String classname, final String schemename) { |
|
53 |
return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); |
|
54 |
} |
|
55 |
|
|
56 |
public static KeyValue getKV(final String id, final String name) { |
|
57 |
return KeyValue.newBuilder().setKey(id).setValue(name).build(); |
|
58 |
} |
|
59 |
|
|
60 |
public static OafEntity getDatasource(final String datasourceId) { |
|
61 |
return OafEntity |
|
62 |
.newBuilder() |
|
63 |
.setType(Type.datasource) |
|
64 |
.setId(datasourceId) |
|
65 |
.setDatasource( |
|
66 |
Datasource.newBuilder().setMetadata( |
|
67 |
Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname")) |
|
68 |
.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage")) |
|
69 |
.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems")) |
|
70 |
.setOdnumberofitemsdate(sf("numberofitems date")) |
|
71 |
// .addOdsubjects("subjects") |
|
72 |
.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes")) |
|
73 |
.setDatasourcetype(getQualifier("type class", "type scheme")))).build(); |
|
74 |
} |
|
75 |
|
|
76 |
public static OafEntity getResult(final String id) { |
|
77 |
return getResultBuilder(id).build(); |
|
78 |
} |
|
79 |
|
|
80 |
public static OafEntity.Builder getResultBuilder(final String id) { |
|
81 |
return OafEntity |
|
82 |
.newBuilder() |
|
83 |
.setType(Type.result) |
|
84 |
.setId(id) |
|
85 |
.setResult( |
|
86 |
Result.newBuilder() |
|
87 |
.setMetadata( |
|
88 |
Result.Metadata |
|
89 |
.newBuilder() |
|
90 |
.addTitle( |
|
91 |
getStructuredproperty( |
|
92 |
"Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population", |
|
93 |
"main title", "dnet:result_titles", getDataInfo())) |
|
94 |
.addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo())) |
|
95 |
.addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects")) |
|
96 |
.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB")) |
|
97 |
.addContext(Context.newBuilder().setId("egi::virtual::970")) |
|
98 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied")) |
|
99 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math")) |
|
100 |
.addContext(Context.newBuilder().setId("egi::classification::natsc")) |
|
101 |
.addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi")) |
|
102 |
.addDescription(sf("Responsible for making and maintaining the extracellular matrix ...")) |
|
103 |
.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD")) |
|
104 |
.setResulttype(getQualifier("publication", "dnet:result_types")) |
|
105 |
.setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed")) |
|
106 |
.addInstance(getInstance("10|od__10", "arxiv"))) |
|
107 |
.addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive")) |
|
108 |
.addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids")) |
|
109 |
.setDateofcollection(""); |
|
110 |
} |
|
111 |
|
|
112 |
public static DataInfo getDataInfo() { |
|
113 |
return getDataInfo("0.4"); |
|
114 |
} |
|
115 |
|
|
116 |
public static DataInfo getDataInfo(final String trust) { |
|
117 |
return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy")) |
|
118 |
.build(); |
|
119 |
} |
|
120 |
|
|
121 |
public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) { |
|
122 |
return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setAccessright(getQualifier("OpenAccess", "dnet:access_modes")) |
|
123 |
.setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url"); |
|
124 |
|
|
125 |
} |
|
126 |
|
|
127 |
public static OafRel getDedupRel(final String source, final String target, final RelType relType, final String relClass) { |
|
128 |
return OafRel.newBuilder().setSource(source).setTarget(target).setRelType(relType).setSubRelType(SubRelType.dedup).setRelClass(relClass) |
|
129 |
.setChild(false).setCachedTarget(getResult(target)) |
|
130 |
.setResultResult(ResultResult.newBuilder().setDedup(Dedup.newBuilder().setRelMetadata(RelMetadata.getDefaultInstance()))) |
|
131 |
.build(); |
|
132 |
} |
|
133 |
|
|
134 |
public static OafRel getProjectOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
135 |
final OafRel.Builder oafRel = OafRel |
|
136 |
.newBuilder() |
|
137 |
.setSource(source) |
|
138 |
.setTarget(target) |
|
139 |
.setRelType(RelType.projectOrganization) |
|
140 |
.setSubRelType(SubRelType.participation) |
|
141 |
.setRelClass(relClass) |
|
142 |
.setChild(false) |
|
143 |
.setProjectOrganization( |
|
144 |
ProjectOrganization.newBuilder().setParticipation( |
|
145 |
Participation.newBuilder().setParticipantnumber("" + 1) |
|
146 |
.setRelMetadata(relMetadata(relClass, "dnet:project_organization_relations")))); |
|
147 |
switch (Participation.RelName.valueOf(relClass)) { |
|
148 |
case hasParticipant: |
|
149 |
oafRel.setCachedTarget(getProjectFP7(target, "SP3")); |
|
150 |
break; |
|
151 |
case isParticipant: |
|
152 |
oafRel.setCachedTarget(getOrganization(target)); |
|
153 |
break; |
|
154 |
default: |
|
155 |
break; |
|
156 |
} |
|
157 |
return oafRel.build(); |
|
158 |
} |
|
159 |
|
|
160 |
public static GeneratedMessage getOrganizationOrganization(final String source, final String target, final String relClass) { |
|
161 |
final OafRel.Builder oafRel = OafRel |
|
162 |
.newBuilder() |
|
163 |
.setSource(source) |
|
164 |
.setTarget(target) |
|
165 |
.setRelType(RelType.organizationOrganization) |
|
166 |
.setSubRelType(SubRelType.dedup) |
|
167 |
.setRelClass(relClass) |
|
168 |
.setChild(true) |
|
169 |
.setOrganizationOrganization( |
|
170 |
OrganizationOrganization.newBuilder().setDedup( |
|
171 |
Dedup.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:organization_organization_relations")))); |
|
172 |
|
|
173 |
switch (Dedup.RelName.valueOf(relClass)) { |
|
174 |
case isMergedIn: |
|
175 |
oafRel.setCachedTarget(getOrganization(source)); |
|
176 |
break; |
|
177 |
case merges: |
|
178 |
oafRel.setCachedTarget(getOrganization(target)); |
|
179 |
break; |
|
180 |
default: |
|
181 |
break; |
|
182 |
} |
|
183 |
return oafRel.build(); |
|
184 |
} |
|
185 |
|
|
186 |
public static OafRel getDatasourceOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
187 |
final OafRel.Builder oafRel = OafRel |
|
188 |
.newBuilder() |
|
189 |
.setSource(source) |
|
190 |
.setTarget(target) |
|
191 |
.setRelType(RelType.datasourceOrganization) |
|
192 |
.setSubRelType(SubRelType.provision) |
|
193 |
.setRelClass(relClass) |
|
194 |
.setChild(false) |
|
195 |
.setDatasourceOrganization( |
|
196 |
DatasourceOrganization.newBuilder().setProvision( |
|
197 |
Provision.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:datasource_organization_relations")))); |
|
198 |
switch (Provision.RelName.valueOf(relClass)) { |
|
199 |
case isProvidedBy: |
|
200 |
oafRel.setCachedTarget(getOrganization(target)); |
|
201 |
break; |
|
202 |
case provides: |
|
203 |
oafRel.setCachedTarget(getDatasource(target)); |
|
204 |
break; |
|
205 |
default: |
|
206 |
break; |
|
207 |
} |
|
208 |
return oafRel.build(); |
|
209 |
} |
|
210 |
|
|
211 |
public static OafRel getSimilarityRel(final String sourceId, final String targetId, final OafEntity result, final String relClass) { |
|
212 |
return OafRel |
|
213 |
.newBuilder() |
|
214 |
.setSource(sourceId) |
|
215 |
.setTarget(targetId) |
|
216 |
.setRelType(RelType.resultResult) |
|
217 |
.setSubRelType(SubRelType.similarity) |
|
218 |
.setRelClass(relClass) |
|
219 |
.setChild(false) |
|
220 |
.setCachedTarget(result) |
|
221 |
.setResultResult( |
|
222 |
ResultResult.newBuilder().setSimilarity( |
|
223 |
Similarity.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:resultResult_relations")).setSimilarity(.4f) |
|
224 |
.setType(Similarity.Type.STANDARD))).build(); |
|
225 |
} |
|
226 |
|
|
227 |
public static RelMetadata.Builder relMetadata(final String classname, final String schemename) { |
|
228 |
return RelMetadata.newBuilder().setSemantics(getQualifier(classname, schemename)); |
|
229 |
} |
|
230 |
|
|
231 |
public static OafEntity getOrganization(final String orgId) { |
|
232 |
return OafEntity |
|
233 |
.newBuilder() |
|
234 |
.setType(Type.organization) |
|
235 |
.setId(orgId) |
|
236 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
237 |
.setOrganization( |
|
238 |
Organization.newBuilder().setMetadata( |
|
239 |
Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE")) |
|
240 |
.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org")) |
|
241 |
.setCountry(getQualifier("ML", "dnet:countries")))).build(); |
|
242 |
} |
|
243 |
|
|
244 |
public static OafRel getResultProject(final String from, final String to, final OafEntity project, final String relClass) |
|
245 |
throws InvalidProtocolBufferException { |
|
246 |
return OafRel |
|
247 |
.newBuilder() |
|
248 |
.setSource(from) |
|
249 |
.setTarget(to) |
|
250 |
.setRelType(RelType.resultProject) |
|
251 |
.setSubRelType(SubRelType.outcome) |
|
252 |
.setRelClass(relClass) |
|
253 |
.setChild(false) |
|
254 |
.setResultProject( |
|
255 |
ResultProject.newBuilder().setOutcome(Outcome.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:result_project_relations")))) |
|
256 |
.setCachedTarget(project).build(); |
|
257 |
} |
|
258 |
|
|
259 |
public static OafEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException { |
|
260 |
return OafEntity |
|
261 |
.newBuilder() |
|
262 |
.setType(Type.project) |
|
263 |
.setId(projectId) |
|
264 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
265 |
.setProject( |
|
266 |
Project.newBuilder() |
|
267 |
.setMetadata( |
|
268 |
Project.Metadata |
|
269 |
.newBuilder() |
|
270 |
.setAcronym(sf("5CYRQOL")) |
|
271 |
.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life")) |
|
272 |
.setStartdate(sf("2007-05-01")) |
|
273 |
.setEnddate(sf("2007-10-31")) |
|
274 |
.setEcsc39(sf("false")) |
|
275 |
.setContracttype(getQualifier("CSA", "ec:FP7contractTypes")) |
|
276 |
.addFundingtree( |
|
277 |
sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::" |
|
278 |
+ fundingProgram |
|
279 |
+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::" |
|
280 |
+ fundingProgram |
|
281 |
+ "</id><description>" |
|
282 |
+ fundingProgram |
|
283 |
+ "-People</description><name>" |
|
284 |
+ fundingProgram |
|
285 |
+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>")))) |
|
286 |
.build(); |
|
287 |
} |
|
288 |
|
|
289 |
public static OafEntity getProjectWT() throws InvalidProtocolBufferException { |
|
290 |
return OafEntity |
|
291 |
.newBuilder() |
|
292 |
.setType(Type.project) |
|
293 |
.setId("project|wt::087536") |
|
294 |
.addCollectedfrom(getKV("wellcomeTrust", "wellcome trust")) |
|
295 |
.setProject( |
|
296 |
Project.newBuilder() |
|
297 |
.setMetadata( |
|
298 |
Project.Metadata |
|
299 |
.newBuilder() |
|
300 |
.setAcronym(sf("UNKNOWN")) |
|
301 |
.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP).")) |
|
302 |
.setStartdate(sf("2007-05-01")) |
|
303 |
.setEnddate(sf("2007-10-31")) |
|
304 |
.setEcsc39(sf("false")) |
|
305 |
.setContracttype(getQualifier("UNKNOWN", "wt:contractTypes")) |
|
306 |
.addFundingtree( |
|
307 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")) |
|
308 |
.addFundingtree( |
|
309 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")))) |
|
310 |
.build(); |
|
311 |
} |
|
312 |
|
|
313 |
public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) { |
|
314 |
final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value); |
|
315 |
return e.build(); |
|
316 |
} |
|
317 |
|
|
318 |
// public static DocumentClasses documentClasses() { |
|
319 |
// DocumentClasses.Builder builder = DocumentClasses.newBuilder(); |
|
320 |
// for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASSES) + 1; i++) { |
|
321 |
// builder.addArXivClasses(getDocumentClass()).addDdcClasses(getDocumentClass()).addWosClasses(getDocumentClass()) |
|
322 |
// .addMeshEuroPMCClasses(getDocumentClass()); |
|
323 |
// } |
|
324 |
// return builder.build(); |
|
325 |
// } |
|
326 |
// |
|
327 |
// private static DocumentClass getDocumentClass() { |
|
328 |
// DocumentClass.Builder builder = DocumentClass.newBuilder(); |
|
329 |
// for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASS_LABELS) + 1; i++) { |
|
330 |
// builder.addClassLabels("test_class_" + i); |
|
331 |
// } |
|
332 |
// return builder.setConfidenceLevel(0.5F).build(); |
|
333 |
// } |
|
334 |
// |
|
335 |
// public static DocumentStatistics documentStatistics() { |
|
336 |
// return |
|
337 |
// DocumentStatistics.newBuilder().setCitationsFromAllPapers(basicCitationStatistics()).setCitationsFromPublishedPapers(basicCitationStatistics()) |
|
338 |
// .build(); |
|
339 |
// } |
|
340 |
// |
|
341 |
// private static BasicCitationStatistics basicCitationStatistics() { |
|
342 |
// BasicCitationStatistics.Builder builder = BasicCitationStatistics.newBuilder(); |
|
343 |
// for (int i = 0; i < N_CITATION_STATS; i++) { |
|
344 |
// builder.addNumberOfCitationsPerYear(statisticsKeyValue()); |
|
345 |
// builder.setNumberOfCitations(RandomUtils.nextInt(5) + 1); |
|
346 |
// } |
|
347 |
// return builder.build(); |
|
348 |
// } |
|
349 |
// |
|
350 |
// private static StatisticsKeyValue statisticsKeyValue() { |
|
351 |
// return StatisticsKeyValue.newBuilder().setKey((RandomUtils.nextInt(30) + 1980) + "").setValue(RandomUtils.nextInt(5) + 1).build(); |
|
352 |
// } |
|
353 |
// |
|
354 |
// public static AuthorStatistics authorStatistics() { |
|
355 |
// AuthorStatistics.Builder builder = AuthorStatistics.newBuilder(); |
|
356 |
// builder.setCore(commonCoreStatistics()); |
|
357 |
// for (int i = 0; i < N_COAUTHORS; i++) { |
|
358 |
// builder.addCoAuthors(coAuthor()); |
|
359 |
// } |
|
360 |
// return builder.build(); |
|
361 |
// } |
|
362 |
// |
|
363 |
// private static CoAuthor coAuthor() { |
|
364 |
// CoAuthor.Builder builder = CoAuthor.newBuilder(); |
|
365 |
// builder.setId("30|od______2345::" + Hashing.md5(RandomStringUtils.random(10))); |
|
366 |
// builder.setCoauthoredPapersCount(RandomUtils.nextInt(5) + 1); |
|
367 |
// return builder.build(); |
|
368 |
// } |
|
369 |
// |
|
370 |
// public static CommonCoreStatistics commonCoreStatistics() { |
|
371 |
// CommonCoreStatistics.Builder builder = CommonCoreStatistics.newBuilder(); |
|
372 |
// |
|
373 |
// builder.setAllPapers(coreStatistics()); |
|
374 |
// builder.setPublishedPapers(coreStatistics()); |
|
375 |
// |
|
376 |
// return builder.build(); |
|
377 |
// } |
|
378 |
// |
|
379 |
// private static CoreStatistics coreStatistics() { |
|
380 |
// CoreStatistics.Builder builder = CoreStatistics.newBuilder(); |
|
381 |
// |
|
382 |
// builder.setNumberOfPapers(RandomUtils.nextInt(10)); |
|
383 |
// builder.setCitationsFromAllPapers(extendedStatistics()); |
|
384 |
// builder.setCitationsFromPublishedPapers(extendedStatistics()); |
|
385 |
// |
|
386 |
// return builder.build(); |
|
387 |
// } |
|
388 |
// |
|
389 |
// private static ExtendedStatistics extendedStatistics() { |
|
390 |
// ExtendedStatistics.Builder builder = ExtendedStatistics.newBuilder(); |
|
391 |
// |
|
392 |
// builder.setBasic(basicCitationStatistics()); |
|
393 |
// builder.setAverageNumberOfCitationsPerPaper(RandomUtils.nextFloat()); |
|
394 |
// for (int i = 0; i < N_CITATION_STATS; i++) { |
|
395 |
// builder.addNumberOfPapersCitedAtLeastXTimes(statisticsKeyValue()); |
|
396 |
// } |
|
397 |
// |
|
398 |
// return builder.build(); |
|
399 |
// } |
|
400 |
|
|
401 |
public static StringField sf(final String s) { |
|
402 |
return sf(s, null); |
|
403 |
} |
|
404 |
|
|
405 |
public static StringField sf(final String s, final DataInfo dataInfo) { |
|
406 |
final StringField.Builder sf = StringField.newBuilder().setValue(s); |
|
407 |
if (dataInfo != null) { |
|
408 |
sf.setDataInfo(dataInfo); |
|
409 |
} |
|
410 |
return sf.build(); |
|
411 |
} |
|
412 |
|
|
413 |
public static OafDecoder embed(final GeneratedMessage msg, |
|
414 |
final Kind kind, |
|
415 |
final boolean deletedByInference, |
|
416 |
final boolean inferred, |
|
417 |
final String provenance, |
|
418 |
final String action) { |
|
419 |
|
|
420 |
final Oaf.Builder oaf = Oaf |
|
421 |
.newBuilder() |
|
422 |
.setKind(kind) |
|
423 |
.setLastupdatetimestamp(System.currentTimeMillis()) |
|
424 |
.setDataInfo( |
|
425 |
DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5") |
|
426 |
.setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action))); |
|
427 |
switch (kind) { |
|
428 |
case entity: |
|
429 |
oaf.setEntity((OafEntity) msg); |
|
430 |
break; |
|
431 |
case relation: |
|
432 |
oaf.setRel((OafRel) msg); |
|
433 |
break; |
|
434 |
default: |
|
435 |
break; |
|
436 |
} |
|
437 |
|
|
438 |
return OafDecoder.decode(oaf.build()); |
|
439 |
} |
|
440 |
|
|
441 |
public static OafDecoder embed(final GeneratedMessage msg, final Kind kind) { |
|
442 |
return embed(msg, kind, false, false, "inference_provenance", "provenance_action"); |
|
443 |
} |
|
444 |
|
|
445 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/mapreduce/util/OafEntityDecoder.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import java.util.LinkedList; |
|
4 |
import java.util.List; |
|
5 |
import java.util.stream.Collectors; |
|
6 |
|
|
7 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
8 |
import com.google.protobuf.GeneratedMessage; |
|
9 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
10 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
11 |
import eu.dnetlib.data.transform.AbstractProtoMapper; |
|
12 |
|
|
13 |
public class OafEntityDecoder extends AbstractProtoMapper { |
|
14 |
|
|
15 |
private final OafEntity oafEntity; |
|
16 |
|
|
17 |
private OafEntityDecoder(final OafEntity oafEntity) { |
|
18 |
this.oafEntity = oafEntity; |
|
19 |
} |
|
20 |
|
|
21 |
public static OafEntityDecoder decode(final OafEntity oafEntity) { |
|
22 |
return new OafEntityDecoder(oafEntity); |
|
23 |
} |
|
24 |
|
|
25 |
public Type getType() { |
|
26 |
return oafEntity.getType(); |
|
27 |
} |
|
28 |
|
|
29 |
public String getId() { |
|
30 |
return oafEntity.getId(); |
|
31 |
} |
|
32 |
|
|
33 |
public GeneratedMessage getOafEntity() { |
|
34 |
return oafEntity; |
|
35 |
} |
|
36 |
|
|
37 |
public GeneratedMessage getEntity() { |
|
38 |
final FieldDescriptor fd = oafEntity.getDescriptorForType().findFieldByName(typeName()); |
|
39 |
return (GeneratedMessage) oafEntity.getField(fd); |
|
40 |
} |
|
41 |
|
|
42 |
public String typeName() { |
|
43 |
return oafEntity.getType().toString(); |
|
44 |
} |
|
45 |
|
|
46 |
public GeneratedMessage getMetadata() { |
|
47 |
final GeneratedMessage entity = getEntity(); |
|
48 |
final FieldDescriptor fd = entity.getDescriptorForType().findFieldByName("metadata"); |
|
49 |
final GeneratedMessage field = (GeneratedMessage) entity.getField(fd); |
|
50 |
return field; |
|
51 |
} |
|
52 |
|
|
53 |
public List<String> getFieldValues(final String path) { |
|
54 |
return processPath(getOafEntity(), path, eu.dnetlib.pace.config.Type.String).stream() |
|
55 |
.map(o -> o.toString()) |
|
56 |
.collect(Collectors.toCollection(LinkedList::new)); |
|
57 |
} |
|
58 |
|
|
59 |
public String getDateOfCollection() { |
|
60 |
return oafEntity.getDateofcollection(); |
|
61 |
} |
|
62 |
|
|
63 |
public String getDateOfTransformation() { |
|
64 |
return oafEntity.getDateoftransformation(); |
|
65 |
} |
|
66 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/mapreduce/util/OafRowKeyDecoder.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import org.apache.commons.lang.StringUtils; |
|
4 |
import org.apache.commons.logging.Log; |
|
5 |
import org.apache.commons.logging.LogFactory; |
|
6 |
import org.apache.hadoop.io.Text; |
|
7 |
|
|
8 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
9 |
|
|
10 |
public class OafRowKeyDecoder { |
|
11 |
|
|
12 |
/** |
|
13 |
* logger. |
|
14 |
*/ |
|
15 |
private static final Log log = LogFactory.getLog(OafRowKeyDecoder.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
16 |
|
|
17 |
private static final String SEPARATOR = "|"; |
|
18 |
|
|
19 |
public final static String ID_REGEX = "^[0-9][0-9]\\|.{12}::[a-zA-Z0-9]{32}$"; |
|
20 |
|
|
21 |
private String key; |
|
22 |
|
|
23 |
private Type type = null; |
|
24 |
|
|
25 |
private String id = null; |
|
26 |
|
|
27 |
public static OafRowKeyDecoder decode(final byte[] key) throws IllegalArgumentException { |
|
28 |
return new OafRowKeyDecoder(new String(key)); |
|
29 |
} |
|
30 |
|
|
31 |
public static OafRowKeyDecoder decode(final String key) throws IllegalArgumentException { |
|
32 |
return new OafRowKeyDecoder(key); |
|
33 |
} |
|
34 |
|
|
35 |
private OafRowKeyDecoder(final String key) throws IllegalArgumentException { |
|
36 |
this.key = key; |
|
37 |
|
|
38 |
if (!key.matches(ID_REGEX)) { |
|
39 |
String msg = "invalid key: '" + key + "'"; |
|
40 |
log.error(msg); |
|
41 |
throw new IllegalArgumentException(msg); |
|
42 |
} |
|
43 |
|
|
44 |
int tag = Integer.parseInt(StringUtils.substringBefore(key, SEPARATOR)); |
|
45 |
this.type = Type.valueOf(tag); |
|
46 |
this.id = StringUtils.substringAfter(key, SEPARATOR); |
|
47 |
|
|
48 |
// System.out.println(OafRowTypeDecoder.class.getName() +" decoded key: " + split); |
|
49 |
} |
|
50 |
|
|
51 |
public String getKey() { |
|
52 |
return key; |
|
53 |
} |
|
54 |
|
|
55 |
public Text getKeyAsText() { |
|
56 |
return new Text(key); |
|
57 |
} |
|
58 |
|
|
59 |
public Type getType() { |
|
60 |
return type; |
|
61 |
} |
|
62 |
|
|
63 |
public String getId() { |
|
64 |
return id; |
|
65 |
} |
|
66 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.2.5/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.nio.charset.Charset; |
|
4 |
import java.security.MessageDigest; |
|
5 |
import java.util.*; |
|
6 |
import java.util.function.Function; |
|
7 |
import java.util.stream.Collectors; |
|
8 |
|
|
9 |
import com.google.common.base.Predicate; |
|
10 |
import com.google.common.base.Splitter; |
|
11 |
import com.google.common.collect.Lists; |
|
12 |
import com.google.common.collect.Maps; |
|
13 |
import com.google.common.collect.Sets; |
|
14 |
import com.google.protobuf.Descriptors.Descriptor; |
|
15 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
16 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
17 |
import com.google.protobuf.Message; |
|
18 |
import com.google.protobuf.Message.Builder; |
|
19 |
import com.google.protobuf.ProtocolMessageEnum; |
|
20 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; |
|
21 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; |
|
22 |
import eu.dnetlib.data.proto.DedupProtos.Dedup; |
|
23 |
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity; |
|
24 |
import eu.dnetlib.data.proto.FieldTypeProtos.*; |
|
25 |
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription; |
|
26 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
27 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
28 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
29 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
30 |
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; |
|
31 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
|
32 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
|
33 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
34 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
35 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
36 |
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization; |
|
37 |
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation; |
|
38 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
39 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
40 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; |
|
41 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part; |
|
42 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset; |
|
43 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity; |
|
44 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement; |
|
45 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
46 |
import eu.dnetlib.miscutils.collections.Pair; |
|
47 |
import eu.dnetlib.miscutils.iterators.IterablePair; |
|
48 |
import org.apache.commons.codec.binary.Base64; |
|
49 |
import org.apache.commons.codec.binary.Hex; |
|
50 |
import org.apache.commons.lang3.StringUtils; |
|
51 |
import org.apache.commons.lang.math.NumberUtils; |
|
52 |
import org.w3c.dom.NamedNodeMap; |
|
53 |
import org.w3c.dom.Node; |
|
54 |
import org.w3c.dom.NodeList; |
|
55 |
|
|
56 |
public abstract class AbstractDNetXsltFunctions { |
|
57 |
|
|
58 |
public static final String URL_REGEX = "^(http|https|ftp)\\://.*"; |
|
59 |
private static final int MAX_NSPREFIX_LEN = 12; |
|
60 |
public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX); |
|
61 |
public static Map<String, String> code2name = Maps.newHashMap(); |
|
62 |
|
|
63 |
/* |
|
64 |
* Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the |
|
65 |
* relational db |
|
66 |
*/ |
|
67 |
//code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)"); |
|
68 |
static { |
|
69 |
code2name.put("MH","Marshall Islands"); |
|
70 |
code2name.put("CF","Central African Republic"); |
|
71 |
code2name.put("TD","Chad"); |
|
72 |
code2name.put("CN","China (People's Republic of)"); |
|
73 |
code2name.put("NG","Nigeria"); |
|
74 |
code2name.put("NF","Norfolk Island"); |
|
75 |
code2name.put("MP","Northern Mariana Islands"); |
|
76 |
code2name.put("PS","Palestinian-administered areas"); |
|
77 |
code2name.put("SZ","Swaziland"); |
|
78 |
code2name.put("max","Manx"); |
|
79 |
code2name.put("TW","Taiwan"); |
|
80 |
code2name.put("TJ","Tajikistan"); |
|
81 |
code2name.put("BSG","Research for the benefit of specific groups"); |
|
82 |
code2name.put("CP","Collaborative project"); |
|
83 |
code2name.put("12MONTHS","12 Months Embargo"); |
|
84 |
code2name.put("ace","Achinese"); |
|
85 |
code2name.put("egy","Ancient Egyptian"); |
|
86 |
code2name.put("ara","Arabic"); |
|
87 |
code2name.put("arc","Aramaic"); |
|
88 |
code2name.put("arp","Arapaho"); |
|
89 |
code2name.put("gon","Gondi"); |
|
90 |
code2name.put("ine","Indo-European"); |
|
91 |
code2name.put("ipk","Inupiaq"); |
|
92 |
code2name.put("ira","Iranian"); |
|
93 |
code2name.put("lim","Limburgan; Limburger; Limburgish"); |
|
94 |
code2name.put("mni","Manipuri"); |
|
95 |
code2name.put("mno","Manobo"); |
|
96 |
code2name.put("men","Mende"); |
|
97 |
code2name.put("CX","Christmas Island"); |
|
98 |
code2name.put("CC","Cocos (Keeling) Islands"); |
|
99 |
code2name.put("KM","Comoros"); |
|
100 |
code2name.put("CG","Congo"); |
|
101 |
code2name.put("CK","Cook Islands"); |
|
102 |
code2name.put("HR","Croatia"); |
|
103 |
code2name.put("arn","Araucanian"); |
|
104 |
code2name.put("art","Artificial"); |
|
105 |
code2name.put("nah","Aztec"); |
|
106 |
code2name.put("bug","Buginese"); |
|
107 |
code2name.put("chn","Chinook jargon"); |
|
108 |
code2name.put("chv","Chuvash"); |
|
109 |
code2name.put("mus","Creek"); |
|
110 |
code2name.put("mic","Micmac"); |
|
111 |
code2name.put("min","Minangkabau"); |
|
112 |
code2name.put("fro","Old French"); |
|
113 |
code2name.put("cpp","Portuguese-based Creoles and Pidgins"); |
|
114 |
code2name.put("som","Somali"); |
|
115 |
code2name.put("wen","Sorbian"); |
|
116 |
code2name.put("hrv","Croatian"); |
|
117 |
code2name.put("cus","Cushitic"); |
|
118 |
code2name.put("sot","Sotho, Southern"); |
|
119 |
code2name.put("sai","South American Indian"); |
|
120 |
code2name.put("esl/spa","Spanish"); |
|
121 |
code2name.put("CU","Cuba"); |
|
122 |
code2name.put("CW","Curaçao"); |
|
123 |
code2name.put("CZ","Czech Republic"); |
|
124 |
code2name.put("DK","Denmark"); |
|
125 |
code2name.put("ER","Eritrea"); |
|
126 |
code2name.put("TF","French Southern Territories"); |
|
127 |
code2name.put("GW","Guinea-Bissau"); |
|
128 |
code2name.put("VA","Holy See (Vatican City State)"); |
|
129 |
code2name.put("BO","Bolivia"); |
|
130 |
code2name.put("KY","Cayman Islands"); |
|
131 |
code2name.put("dra","Dravidian"); |
|
132 |
code2name.put("cpe","English-based Creoles and Pidgins"); |
|
133 |
code2name.put("oji","Ojibwa"); |
|
134 |
code2name.put("CIP-EIP-TN","CIP-Eco-Innovation - CIP-Thematic Network"); |
|
135 |
code2name.put("jav/jaw","Javanese"); |
|
136 |
code2name.put("ach","Acoli"); |
|
137 |
code2name.put("ada","Adangme"); |
|
138 |
code2name.put("afh","Afrihili"); |
|
139 |
code2name.put("afr","Afrikaans"); |
|
140 |
code2name.put("afa","Afro-Asiatic"); |
|
141 |
code2name.put("ale","Aleut"); |
|
142 |
code2name.put("alg","Algonquian languages"); |
|
143 |
code2name.put("arw","Arawak"); |
|
144 |
code2name.put("asm","Assamese"); |
|
145 |
code2name.put("ava","Avaric"); |
|
146 |
code2name.put("ave","Avestan"); |
|
147 |
code2name.put("bra","Braj"); |
|
148 |
code2name.put("bua","Buriat"); |
|
149 |
code2name.put("chr","Cherokee"); |
|
150 |
code2name.put("chy","Cheyenne"); |
|
151 |
code2name.put("jrb","Judeo-Arabic"); |
|
152 |
code2name.put("jpr","Judeo-Persian"); |
|
153 |
code2name.put("kab","Kabyle"); |
|
154 |
code2name.put("kac","Kachin"); |
|
155 |
code2name.put("kaa","Kara-Kalpak"); |
|
156 |
code2name.put("loz","Lozi"); |
|
157 |
code2name.put("mwr","Marwari"); |
|
158 |
code2name.put("DJ","Djibouti"); |
|
159 |
code2name.put("JM","Jamaica"); |
|
160 |
code2name.put("JP","Japan"); |
|
161 |
code2name.put("JE","Jersey"); |
|
162 |
code2name.put("JO","Jordan"); |
|
163 |
code2name.put("KZ","Kazakhstan"); |
|
164 |
code2name.put("KE","Kenya"); |
|
165 |
code2name.put("KI","Kiribati"); |
|
166 |
code2name.put("KR","Korea (Republic of)"); |
|
167 |
code2name.put("KP","Korea, Democatric People's Republic of"); |
|
168 |
code2name.put("XK","Kosovo * UN resolution"); |
|
169 |
code2name.put("KW","Kuwait"); |
|
170 |
code2name.put("NL","Netherlands"); |
|
171 |
code2name.put("PE","Peru"); |
|
172 |
code2name.put("PH","Philippines"); |
|
173 |
code2name.put("fre/fra","French"); |
|
174 |
code2name.put("PL","Poland"); |
|
175 |
code2name.put("PT","Portugal"); |
|
176 |
code2name.put("PR","Puerto Rico"); |
|
177 |
code2name.put("QA","Qatar"); |
|
178 |
code2name.put("RO","Romania"); |
|
179 |
code2name.put("RU","Russian Federation"); |
|
180 |
code2name.put("RW","Rwanda"); |
|
181 |
code2name.put("RE","Réunion"); |
|
182 |
code2name.put("sve/swe","Swedish"); |
|
183 |
code2name.put("myn","Mayan"); |
|
184 |
code2name.put("dum","Middle Dutch"); |
|
185 |
code2name.put("mun","Munda"); |
|
186 |
code2name.put("nde","Ndebele, North"); |
|
187 |
code2name.put("ndo","Ndonga"); |
|
188 |
code2name.put("nyn","Nyankole"); |
|
189 |
code2name.put("nzi","Nzima"); |
|
190 |
code2name.put("oci","Occitan (post 1500); Provençal"); |
|
191 |
code2name.put("GU","Guam"); |
|
192 |
code2name.put("tut","Altaic"); |
|
193 |
code2name.put("awa","Awadhi"); |
|
194 |
code2name.put("ban","Balinese"); |
|
195 |
code2name.put("bal","Baluchi"); |
|
196 |
code2name.put("bai","Bamileke"); |
|
197 |
code2name.put("bad","Banda"); |
|
198 |
code2name.put("UK","United Kingdom"); |
|
199 |
code2name.put("bas","Basa"); |
|
200 |
code2name.put("tib/bod","Tibetan"); |
|
201 |
code2name.put("ben","Bengali"); |
|
202 |
code2name.put("ber","Berber"); |
|
203 |
code2name.put("cho","Choctaw"); |
|
204 |
code2name.put("cop","Coptic"); |
|
205 |
code2name.put("crp","Creoles and Pidgins"); |
|
206 |
code2name.put("dak","Dakota"); |
|
207 |
code2name.put("del","Delaware"); |
|
208 |
code2name.put("div","Divehi"); |
|
209 |
code2name.put("kha","Khasi"); |
|
210 |
code2name.put("khi","Khoisan"); |
|
211 |
code2name.put("kho","Khotanese"); |
|
212 |
code2name.put("osa","Osage"); |
|
213 |
code2name.put("oss","Ossetian; Ossetic"); |
|
214 |
code2name.put("oto","Otomian"); |
|
215 |
code2name.put("GT","Guatemala"); |
|
216 |
code2name.put("ota","Ottoman"); |
|
217 |
code2name.put("GG","Guernsey"); |
|
218 |
code2name.put("GY","Guyana"); |
|
219 |
code2name.put("LA","Lao (People's Democratic Republic)"); |
|
220 |
code2name.put("LB","Lebanon"); |
|
221 |
code2name.put("LY","Libyan Arab Jamahiriya"); |
|
222 |
code2name.put("LI","Liechtenstein"); |
|
223 |
code2name.put("LT","Lithuania"); |
|
224 |
code2name.put("LU","Luxembourg"); |
|
225 |
code2name.put("PW","Palau"); |
|
226 |
code2name.put("BL","Saint-Barthélemy"); |
|
227 |
code2name.put("SM","San Marino"); |
|
228 |
code2name.put("SX","Sint Maarten (Dutch Part)"); |
|
229 |
code2name.put("TL","Timor-Leste"); |
|
230 |
code2name.put("TK","Tokelau"); |
|
231 |
code2name.put("TO","Tonga"); |
|
232 |
code2name.put("TN","Tunisia"); |
|
233 |
code2name.put("TC","Turks and Caicos Islands"); |
|
234 |
code2name.put("TV","Tuvalu"); |
|
235 |
code2name.put("GB","United Kingdom"); |
|
236 |
code2name.put("VU","Vanuatu"); |
|
237 |
code2name.put("pal","Pahlavi"); |
|
238 |
code2name.put("pau","Palauan"); |
|
239 |
code2name.put("pam","Pampanga"); |
|
240 |
code2name.put("pag","Pangasinan"); |
|
241 |
code2name.put("pap","Papiamento"); |
|
242 |
code2name.put("fas/per","Persian"); |
|
243 |
code2name.put("phn","Phoenician"); |
|
244 |
code2name.put("sid","Sidamo"); |
|
245 |
code2name.put("GA","Gabon"); |
|
246 |
code2name.put("GL","Greenland"); |
|
247 |
code2name.put("GD","Grenada"); |
|
248 |
code2name.put("GP","Guadeloupe"); |
|
249 |
code2name.put("IE","Ireland"); |
|
250 |
code2name.put("spa","Spanish; Castilian"); |
|
251 |
code2name.put("IM","Isle of Man"); |
|
252 |
code2name.put("IT","Italy"); |
|
253 |
code2name.put("ES","Spain"); |
|
254 |
code2name.put("SR","Suriname"); |
|
255 |
code2name.put("TZ","Tanzania (United Republic of)"); |
|
256 |
code2name.put("TH","Thailand"); |
|
257 |
code2name.put("TG","Togo"); |
|
258 |
code2name.put("UG","Uganda"); |
|
259 |
code2name.put("UZ","Uzbekistan"); |
|
260 |
code2name.put("VE","Venezuela"); |
|
261 |
code2name.put("VI","Virgin Islands, U.S."); |
|
262 |
code2name.put("WF","Wallis and Futuna"); |
|
263 |
code2name.put("COFUND-PCP","COFUND (PCP)"); |
|
264 |
code2name.put("amh","Amharic"); |
|
265 |
code2name.put("map","Austronesian"); |
|
266 |
code2name.put("aym","Aymara"); |
|
267 |
code2name.put("bnt","Bantu"); |
|
268 |
code2name.put("bak","Bashkir"); |
|
269 |
code2name.put("bho","Bhojpuri"); |
|
270 |
code2name.put("bik","Bikol"); |
|
271 |
code2name.put("bul","Bulgarian"); |
|
272 |
code2name.put("cor","Cornish"); |
|
273 |
code2name.put("dua","Duala"); |
|
274 |
code2name.put("dut/nld","Dutch; Flemish"); |
|
275 |
code2name.put("isRelatedTo","isRelatedTo"); |
|
276 |
code2name.put("coauthor","coauthor"); |
|
277 |
code2name.put("dyu","Dyula"); |
|
278 |
code2name.put("eka","Ekajuk"); |
|
279 |
code2name.put("gil","Gilbertese"); |
|
280 |
code2name.put("suk","Sukuma"); |
|
281 |
code2name.put("sux","Sumerian"); |
|
282 |
code2name.put("sun","Sundanese"); |
|
283 |
code2name.put("sus","Susu"); |
|
284 |
code2name.put("swa","Swahili"); |
|
285 |
code2name.put("0010","Lecture"); |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-6.2.5