Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/pom.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/pom.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/pom.xml (revision 58513) @@ -0,0 +1,155 @@ + + + + eu.dnetlib + dnet45-parent + 1.0.0 + + + 4.0.0 + eu.dnetlib + dnet-openaireplus-mapping-utils + jar + 6.3.43 + + scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43 + + + + true + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 2.19.1 + + + integration-test + + integration-test + + + + verify + + verify + + + + + ${skipITs} + + + + + + + + com.google.guava + guava + ${google.guava.version} + + + junit + junit + ${junit.version} + test + + + com.ximpleware + vtd-xml + [2.12, 3.0.0) + + + commons-codec + commons-codec + ${commons.codec.version} + + + dom4j + dom4j + ${dom4j.version} + + + xml-apis + xml-apis + + + + + net.sf.supercsv + super-csv + 2.4.0 + + + eu.dnetlib + dnet-openaire-data-protos + [3.9.8] + + + eu.dnetlib + dnet-pace-core + [3.0.0,4.0.0) + + + eu.dnetlib + cnr-misc-utils + [1.0.0,2.0.0) + + + eu.dnetlib + dnet-hadoop-commons + [2.0.0,3.0.0) + + + eu.dnetlib + dnet-index-solr-common + [2.3.4,3.0.0) + + + com.googlecode.protobuf-java-format + protobuf-java-format + 1.2 + + + org.apache.commons + commons-lang3 + 3.5 + + + + + eu.dnetlib + dnet-openaireplus-profiles + [1.0.0,2.0.0) + test + + + org.mongodb + mongo-java-driver + ${mongodb.driver.version} + test + + + org.springframework + spring-context + ${spring.version} + test + + + org.springframework + spring-core + ${spring.version} + test + + + org.springframework + spring-test + ${spring.version} + test + + + + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java (revision 58513) @@ -0,0 +1,175 @@ +package eu.dnetlib.data.index; + +import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory; +import eu.dnetlib.miscutils.datetime.HumanTime; +import eu.dnetlib.miscutils.functional.UnaryFunction; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; + +import java.io.Closeable; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; + +/** + * Created by michele on 11/11/15. + */ +public class CloudIndexClient implements Closeable { + + private static final Log log = LogFactory.getLog(CloudIndexClient.class); + private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult"; + + private final CloudSolrClient solrClient; + + protected CloudIndexClient(final CloudSolrClient solrServer) { + this.solrClient = solrServer; + } + + public int feed(final String record, final String indexDsId, final UnaryFunction toIndexRecord) throws CloudIndexClientException { + return feed(record, indexDsId, toIndexRecord, true); + } + + public int feed(final String record, final String indexDsId, final UnaryFunction toIndexRecord, final boolean commit) + throws CloudIndexClientException { + try { + final SolrInputDocument doc = prepareSolrDocument(record, indexDsId, toIndexRecord); + if ((doc == null) || doc.isEmpty()) throw new CloudIndexClientException("Invalid solr document"); + return feed(doc, commit); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error feeding document", e); + } + } + + public int feed(final SolrInputDocument document) throws CloudIndexClientException { + return feed(document, true); + } + + public int feed(final SolrInputDocument document, final boolean commit) throws CloudIndexClientException { + try { + final UpdateResponse res = solrClient.add(document); + log.debug("feed time for single records, elapsed time: " + HumanTime.exactly(res.getElapsedTime())); + if (res.getStatus() != 0) { throw new CloudIndexClientException("bad status: " + res.getStatus()); } + if (commit) { + solrClient.commit(); + } + return res.getStatus(); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error feeding document", e); + } + } + + public void feed(final List docs, final AfterFeedingCallback callback) throws CloudIndexClientException { + feed(docs, callback, true); + } + + public void feed(final List docs, final AfterFeedingCallback callback, final boolean commit) throws CloudIndexClientException { + try { + if (docs.isEmpty()) { + log.debug("Empty list of documents. Calling callback, if needed."); + if (callback != null) { + callback.doAfterFeeding(null); + } + return; + } + final UpdateResponse res = solrClient.add(docs); + + log.debug("feed time for " + docs.size() + " records, elapsed tipe: : " + HumanTime.exactly(res.getElapsedTime())); + + if (commit) { + solrClient.commit(); + } + if (callback != null) { + callback.doAfterFeeding(res); + } + if (res.getStatus() != 0) throw new CloudIndexClientException("bad status: " + res.getStatus()); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error feeding documents", e); + } + } + + public SolrInputDocument prepareSolrDocument(final String record, final String indexDsId, final UnaryFunction toIndexRecord) + throws CloudIndexClientException { + try { + final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory(); + + final String version = (new SimpleDateFormat("yyyy-MM-dd\'T\'hh:mm:ss\'Z\'")).format(new Date()); + final String indexRecord = toIndexRecord.evaluate(record); + + if (log.isDebugEnabled()) { + log.debug("***************************************\nSubmitting index record:\n" + indexRecord + "\n***************************************\n"); + } + + return documentFactory.parseDocument(version, indexRecord, indexDsId, INDEX_RECORD_RESULT_FIELD); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error creating solr document", e); + } + } + + public boolean isRecordIndexed(final String id) throws CloudIndexClientException { + final QueryResponse res = query("objidentifier:\"" + id + "\"", null); + return res.getResults().size() > 0; + } + + public int remove(final String id) throws CloudIndexClientException { + return remove(id, true); + } + + public int remove(final String id, final boolean commit) throws CloudIndexClientException { + String q = String.format("objidentifier:\"%s\" OR resultdupid:\"%s\"", id, id); + try { + final UpdateResponse res = solrClient.deleteByQuery(q); + if (commit) { + solrClient.commit(); + } + return res.getResponse().size(); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error removing documents", e); + } + } + + public int count(final String query) throws CloudIndexClientException { + final QueryResponse res = query(query, 0); + return res.getResults().size(); + } + + public QueryResponse query(final String query, Integer rows) throws CloudIndexClientException { + try { + final SolrQuery solrQuery = new SolrQuery(); + solrQuery.setQuery(query); + if(rows != null && rows >= 0) { + solrQuery.setRows(rows); + } + return solrClient.query(solrQuery); + } catch (final Throwable e) { + throw new CloudIndexClientException("Error searching documents", e); + } + } + + public void close() throws IOException { + if (solrClient != null) { + solrClient.close(); + } + } + + public void commit() throws CloudIndexClientException { + if(solrClient != null) { + try { + solrClient.commit(); + } catch (SolrServerException | IOException e) { + throw new CloudIndexClientException(e.getMessage()); + } + } + } + + public interface AfterFeedingCallback { + + void doAfterFeeding(final UpdateResponse response); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java (revision 58513) @@ -0,0 +1,41 @@ +package eu.dnetlib.data.index; + +import eu.dnetlib.functionality.index.utils.ZkServers; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.response.SolrPingResponse; + +/** + * Created by michele on 11/11/15. + */ +public class CloudIndexClientFactory { + + private static final Log log = LogFactory.getLog(CloudIndexClientFactory.class); + + public static CloudIndexClient newIndexClient(final String baseURL, final String collection, final boolean parallelUpdates) + throws CloudIndexClientException { + try { + log.info(String.format("Initializing solr server (%s) ...", baseURL)); + + final ZkServers zk = ZkServers.newInstance(baseURL); + final CloudSolrClient client = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot()) + .withParallelUpdates(parallelUpdates) + .build(); + + client.connect(); + client.setDefaultCollection(collection); + + final SolrPingResponse rsp = client.ping(); + if (rsp.getStatus() != 0) { + log.error("Invalid connection to solr Server (status = 0)"); + throw new CloudIndexClientException("Invalid connection to solr Server (status = 0)"); + } + return new CloudIndexClient(client); + } catch (Throwable e) { + log.error("The initialization of indexClient is FAILED", e); + throw new CloudIndexClientException("The initialization of indexClient is FAILED", e); + } + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientException.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientException.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/index/CloudIndexClientException.java (revision 58513) @@ -0,0 +1,15 @@ +package eu.dnetlib.data.index; + +/** + * Created by michele on 23/11/15. + */ +public class CloudIndexClientException extends Exception { + + public CloudIndexClientException(final String message) { + super(message); + } + + public CloudIndexClientException(final String message, final Throwable cause) { + super(message, cause); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java (revision 58513) @@ -0,0 +1,256 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.HashMap; +import java.util.List; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.protobuf.Descriptors.Descriptor; +import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; +import eu.dnetlib.data.proto.FieldTypeProtos.Author; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.Context; +import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; +import eu.dnetlib.data.proto.ResultProtos.Result.Instance; +import eu.dnetlib.data.proto.TypeProtos.Type; +import org.apache.commons.lang3.StringUtils; +import org.w3c.dom.NodeList; + +public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions { + + public static String oafResult( + final String resultId, + final boolean invisible, + final String provenance, + final String trust, + final NodeList about, + final String originalId, + final String dateOfCollection, + final String dateOfTransformation, + final NodeList metadata) { + + ValueMap values = null; + try { + final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); + values = ValueMap.parseNodeList(metadata); + final Descriptor mDesc = Result.Metadata.getDescriptor(); + + final List collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource); + final List hostedBy = getKeyValues(values, "hostedby", Type.datasource); + + final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc); + final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy); + final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId); + entity.setDateofcollection(dateOfCollection) + .setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about)); + + final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false)); + return base64(oaf.toByteArray()); + } catch (final Throwable e) { + handleException(e, resultId, values); + } + return null; + } + + public static String oafResultUpdate(final String resultId, + final String provenance, + final String trust, + final NodeList nodelist) { + ValueMap values = null; + try { + final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); + values = ValueMap.parseNodeList(nodelist); + final List hostedBy = getKeyValues(values, "hostedby", Type.datasource); + + final Descriptor mDesc = Result.Metadata.getDescriptor(); + + final Result.Metadata.Builder metadata = buildMetadata(values, mDesc); + final Result.Builder result = buildResult(metadata, values, null, hostedBy); + + final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null); + final Oaf oaf = getOaf(entity, null); + return base64(oaf.toByteArray()); + } catch (final Throwable e) { + handleException(e, resultId, values); + } + return null; + } + + private static OafEntity.Builder buildOafEntity( + final Result.Builder result, + final String entityId, + final NodeList nodelist, + final List collectedFrom, + final String originalId) { + + final List pids = Lists.newArrayList(); + pids.addAll(parsePids(nodelist)); + + final OafEntity.Builder entity = + getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids) + .setResult(result); + return entity; + } + + private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) { + final Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); + + if (values.get("creator") != null) { + for (final Element e : values.get("creator")) { + + final Author.Builder author = Author.newBuilder(); + + final String fullname = e.getText(); + author.setFullname(fullname); + author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE))); + + final String nameIdentifier = e.getAttributeValue("nameIdentifier"); + final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme"); + + if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) { + author.addPid(getKV(nameIdentifierScheme, nameIdentifier)); + } + + final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + } + metadata.addAuthor(author); + } + } + + addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies"); + addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title"); + for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) { + if (values.get(fieldname) != null) { + for (final String s : values.get(fieldname).listValues()) { + addField(metadata, mDesc.findFieldByName(fieldname), s); + } + } + } + addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues())); + addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues()); + addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues()); + addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues()); + addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues()); + + String resulttype = getResultType(values); + addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies")); + + addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues()); + addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues()); + if (values.get("concept") != null) { + for (final Element e : values.get("concept")) { + final String id = e.getAttributes().get("id"); + if (StringUtils.isNotBlank(id)) { + metadata.addContext(Context.newBuilder().setId(id)); + } + } + } + if (values.get("journal") != null) { + for (final Element e : values.get("journal")) { + addJournal(metadata, e); + } + } + + return metadata; + } + + private static String getResultType(final ValueMap values) { + + final Element cobjcategory = values.get("cobjcategory").stream() + .map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes())) + .findFirst() + .orElse(new Element("0000", new HashMap<>())); + + final String resulttype = cobjcategory.getAttributeValue("type"); + if (StringUtils.isNotBlank(resulttype)) { + return resulttype; + } + + return getDefaultResulttype(cobjcategory); + } + + private static Result.Builder buildResult(final Result.Metadata.Builder metadata, + final ValueMap values, + final List collectedFrom, + final List hostedBy) { + final Result.Builder result = Result.newBuilder(); + + final Instance.Builder instance = Instance.newBuilder(); + + addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues()); + + addField(instance, Instance.getDescriptor().findFieldByName("accessright"), + setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues())); + + addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), + setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues())); + + addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy); + addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom); + addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues()); + + if (values.get("identifier") != null) { + addField(instance, Instance.getDescriptor().findFieldByName("url"), + Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter))); + } + if (values.get("refereed") != null) { + addField(instance, Instance.getDescriptor().findFieldByName("refereed"), values.get("refereed").listValues()); + } + + final ElementList pcs = values.get("processingchargeamount"); + if (pcs != null && !pcs.isEmpty()) { + addField(instance, Instance.getDescriptor().findFieldByName("processingchargeamount"), pcs.listValues()); + final String currency = pcs.get(0).getAttributeValue("currency"); + if (StringUtils.isNotBlank(currency)) { + addField(instance, Instance.getDescriptor().findFieldByName("processingchargecurrency"), currency); + } + } + + // #4468: processingchargeamount - implementatio to clarify: result vs instance. Single value vs repeatable +// if (values.get("processingchargeamount") != null) { +// ElementList processingchargeamount = values.get("processingchargeamount"); +// for(Element pc : processingchargeamount) { +// addField(instance, Instance.getDescriptor().findFieldByName("processingchargeamount"), pc.getText()); +// addField(instance, Instance.getDescriptor().findFieldByName("processingchargecurrency"), pc.getAttributeValue("processingchargecurrency")); +// } +// } + + result.addInstance(instance); + + final List extrefs = values.get("reference"); + if (!extrefs.isEmpty()) { + final Descriptor extDesc = ExternalReference.getDescriptor(); + for (final Element element : extrefs) { + final ExternalReference.Builder extref = ExternalReference.newBuilder(); + addField(extref, extDesc.findFieldByName("url"), element.getText()); + addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source")); + addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier")); + addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title")); + addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query")); + addField(extref, extDesc.findFieldByName("qualifier"), + setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type"))) + .build()); + + result.addExternalReference(extref); + } + } + + return result.setMetadata(metadata); + } + + private static void handleException(Throwable e, final String resultId, final ValueMap values) { + System.err.println("resultId: " + resultId); + if (values != null) { + System.err.println("values: " + values); + } + e.printStackTrace(); + throw new RuntimeException(e); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/AuthorMerger.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/AuthorMerger.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/AuthorMerger.java (revision 58513) @@ -0,0 +1,185 @@ +package eu.dnetlib.data.transform; + +import com.wcohen.ss.JaroWinkler; +import eu.dnetlib.data.bulktag.Pair; +import eu.dnetlib.data.proto.FieldTypeProtos.Author; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.pace.model.Person; +import org.apache.commons.lang3.StringUtils; + +import java.text.Normalizer; +import java.util.*; +import java.util.function.Function; + +import static java.util.stream.Collectors.*; + +public class AuthorMerger { + + private static final Double THRESHOLD = 0.95; + private static final String ORCID = "orcid"; + private static final int MAX_AUTHORS = 200; + + public static List merge(final Collection> authors, final double threshold) { + return merge(authors, THRESHOLD); + } + + public static List merge(final Collection> authors) { + return doMerge( + authors.stream() + .map(group -> group.stream() + .map(AuthorMerger::fixORCID) + .collect(toList())) + .collect(toList())); + } + + private static List doMerge(final Collection> authors) { + final List res = new ArrayList<>(); + + if (authors.isEmpty()) { + return res; + } + + if (authors.size() == 1) { + return authors.iterator().next(); + } + + final TreeMap>> byOrcidCount = new TreeMap<>( + authors.stream() + .collect(groupingBy(AuthorMerger::countOrcid)) + .entrySet().stream() + .filter(e -> e.getKey() > 0) + .collect(toMap( + Map.Entry::getKey, + Map.Entry::getValue + ))); + + if (byOrcidCount == null || byOrcidCount.isEmpty()) { + return authors.iterator().next(); + } + final Map.Entry>> mostOrcid = byOrcidCount.lastEntry(); + + if (mostOrcid.getKey() > 0) { + + final List pivots = mostOrcid.getValue().iterator().next(); + + res.addAll(mostOrcid.getValue().iterator().next().stream() + .filter(a -> hasOrcid(a)) + .collect(toList())); + + if (pivots.size() == res.size()) { + return res; + } + + final Collection authorList = authors.stream() + .filter(g -> !g.equals(pivots)) + .flatMap(List::stream) + .filter(a -> hasOrcid(a)) + .limit(MAX_AUTHORS) + .map(a -> { + final String orcid = a.getPidList().stream() + .filter(p -> p.getKey().equalsIgnoreCase(ORCID)) + .findFirst() + .get().getValue(); + return new Pair(orcid, a); + }) + .collect(toMap( + p -> p.getFst(), + p -> p.getSnd(), + (p1, p2) -> p2)) + .values(); + + pivots.stream().filter(a -> !hasOrcid(a)).forEach(pivot -> { + final Author.Builder b = Author.newBuilder(pivot); + authorList.parallelStream() + .map(a -> { + return new Pair(sim(a, pivot), a); + }) + .filter(p -> p.getFst() >= THRESHOLD) + .forEach(p -> { + b.mergeFrom(p.getSnd()); + }); + + Collection pids = b.getPidList().stream() + .collect(toMap( + kv -> kv.getKey(), + Function.identity(), + (kv1, kv2) -> kv2 + )).values(); + b.clearPid(); + b.addAllPid(pids); + + res.add(b.build()); + }); + } + + return res; + } + + private static Author fixORCID(final Author author) { + final Author.Builder b = Author.newBuilder(author); + for(KeyValue.Builder pid : b.getPidBuilderList()) { + if (pid.getKey().toLowerCase().contains(ORCID)) { + pid.setKey("ORCID"); + if (pid.getValue().contains("orcid.org")) { + pid.setValue(StringUtils.substringAfterLast(pid.getValue(), "/")); + + } + } + } + return b.build(); + } + + private static int countOrcid(final List authors) { + return authors.stream() + .map(a -> { + return hasOrcid(a) ? 1 : 0; + }) + .mapToInt(Integer::intValue) + .sum(); + } + + private static boolean hasOrcid(Author a) { + return a.getPidList().stream().anyMatch(p -> p.getKey().equalsIgnoreCase(ORCID)); + } + + private static Double sim(Author a, Author b) { + + final Person pa = parse(a); + final Person pb = parse(b); + + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler().score( + normalize(pa.getSurnameString()), + normalize(pb.getSurnameString())); + } else { + return new JaroWinkler().score( + normalize(pa.getNormalisedFullname()), + normalize(pb.getNormalisedFullname())); + } + } + + private static Person parse(Author author) { + if (author.hasSurname()) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } + + private static String normalize(final String s) { + return nfd(s).toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } + + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java (revision 58513) @@ -0,0 +1,451 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; +import eu.dnetlib.data.proto.FieldTypeProtos.Author; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.Context; +import eu.dnetlib.data.proto.ResultProtos.Result.Instance; +import eu.dnetlib.data.proto.ResultProtos.Result.Metadata.Builder; +import eu.dnetlib.data.proto.TypeProtos.Type; +import org.apache.commons.lang3.StringUtils; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions { + + private static Map mappingAccess = Maps.newHashMap(); + + static { + + mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN"); + mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED"); + mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED"); + mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO"); + + // Transformator now maps the access rights into proper values, not sure if it does for all datasets. + mappingAccess.put("OPEN", "OPEN"); + mappingAccess.put("CLOSED", "CLOSED"); + mappingAccess.put("RESTRICTED", "RESTRICTED"); + mappingAccess.put("EMBARGO", "EMBARGO"); + mappingAccess.put("OPEN SOURCE", "OPEN SOURCE"); + + } + + public static String odfResult( + final String resultId, + final boolean invisible, + final NodeList about, + final NodeList metadata, + final NodeList titles, + final NodeList creators, + final NodeList subjects, + final NodeList publisher, + final NodeList descriptions, + final NodeList dates, + final NodeList dateaccepted, + final NodeList resourceTypes, + final NodeList formats, + final NodeList sizes, + final NodeList languages, + final NodeList cobjcategory, + final NodeList contributors, + final NodeList rights, + final NodeList license, + final NodeList version, + final NodeList pidList, + final String provenance, + final String trust, + final NodeList hostedby, + final NodeList collectedfrom, + final NodeList originalIds, + final String instanceUri, + final String landingPage, + final NodeList distributionlocation, + final NodeList documentationUrl, + final String dateOfCollection, + final String dateOfTransformation) { + + try { + final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); + + final Result.Builder result = Result.newBuilder(); + Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder(); + + // subject + for (int i = 0; i < subjects.getLength(); i++) { + Node currentNode = subjects.item(i); + NodeList childNodes = currentNode.getChildNodes(); + if (childNodes.getLength() > 0) { + String subjectValue = childNodes.item(0).getNodeValue(); + String schemeName = "keyword"; + String schemeURI ="keyword"; + if (currentNode.hasAttributes()) { + NamedNodeMap attributes = currentNode.getAttributes(); + Node schemeNameNode = attributes.getNamedItem("subjectScheme"); + Node schemeURINode = attributes.getNamedItem("schemeURI"); + if(schemeNameNode != null) schemeName = schemeNameNode.getTextContent(); + if(schemeURINode != null) schemeURI = schemeURINode.getTextContent(); + if(schemeNameNode != null && schemeURINode == null) schemeURI = schemeName; + if(schemeURINode != null && schemeNameNode == null) schemeName = schemeURI; + } + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"), + getStructuredProperty(subjectValue, schemeURI, schemeName, "dnet:subject_classification_typologies", "dnet:subject_classification_typologies")); + } + } + + // title + for (int i = 0; i < titles.getLength(); i++) { + Node currentNode = titles.item(i); + NodeList childNodes = currentNode.getChildNodes(); + if (childNodes.getLength() > 0) { + String titleValue = childNodes.item(0).getNodeValue(); + String classname = "main title"; + String classid = "main title"; + if (currentNode.hasAttributes()) { + NamedNodeMap attributes = currentNode.getAttributes(); + Node titleType = attributes.getNamedItem("titleType"); + + if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) { + classname = "alternative title"; + classid = "alternative title"; + } + if (titleType != null && titleType.getNodeValue().equals("Subtitle")) { + classname = "subtitle"; + classid = "subtitle"; + } + if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) { + classname = "translated title"; + classid = "translated title"; + } + } + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"), + getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title")); + } + } + + // creators + for (int i = 0; i < creators.getLength(); i++) { + final Element creator = (Element) creators.item(i); + if (creator != null && creator.hasChildNodes()) { + + final NodeList creatorNames = creator.getElementsByTagName("creatorName"); + if (creatorNames.getLength() > 0) { + createAuthor(metadataProto, i, creator, creatorNames); + } else{ + //handle authors with namespaceprefix + final NodeList creatorNamesNs = creator.getElementsByTagNameNS("http://datacite.org/schema/kernel-4", "creatorName"); + if (creatorNamesNs.getLength() > 0) { + createAuthor(metadataProto, i, creator, creatorNamesNs); + } + + } + } + } + + // description + for (int i = 0; i < descriptions.getLength(); i++) { + Element currentNode = (Element) descriptions.item(i); + if (currentNode != null && currentNode.hasChildNodes()) { + String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue(); + + final String descriptionType = currentNode.getAttribute("descriptionType"); + if (StringUtils.isNotBlank(descriptionType)) { + switch (descriptionType) { + case "TechnicalInfo": + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("tool"), descriptionValue); + break; + case "Abstract": + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue); + break; + case "DistributionForm": + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), descriptionValue); + break; + } + } else { + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue); + } + } + } + + // contributors + for (int i = 0; i < contributors.getLength(); i++) { + final Element contributor = (Element) contributors.item(i); + if (contributor != null && contributor.hasChildNodes()) { + + NodeList contributorNames = contributor.getElementsByTagName("contributorName"); + if (contributorNames != null) { + Element contributorName = (Element) contributorNames.item(0); + if (contributorName != null) { + final String contributorValue = contributorName.getTextContent(); + final String contributorType = contributor.getAttribute("contributorType"); + + if (StringUtils.isNotBlank(contributorType)) { + switch (contributorType) { + case "ContactPerson": + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contactperson"), contributorValue); + break; + case "ContactGroup": + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contactgroup"), contributorValue); + break; + } + } else { + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue); + } + } + } + } + } + + // publisher + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher)); + + + + // dates + for (int i = 0; i < dates.getLength(); i++) { + Node currentNode = dates.item(i); + if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) { + + final NamedNodeMap attributes = currentNode.getAttributes(); + final Node dateType = attributes.getNamedItem("dateType") == null ? attributes.getNamedItem("datetype") : null; + if (dateType != null) { + + String dateAttribute = dateType.getNodeValue(); + String dateValue = currentNode.getChildNodes().item(0).getNodeValue(); + String protoAttribute = "relevantdate"; + if ("Accepted".equals(dateAttribute)) { + protoAttribute = "dateofacceptance"; + } else if ("Issued".equals(dateAttribute)) { + protoAttribute = "storagedate"; + } else if ("Updated".equals(dateAttribute)) { + protoAttribute = "lastmetadataupdate"; + } else if ("Available".equals(dateAttribute)) { + protoAttribute = "embargoenddate"; + } + if (protoAttribute.equals("relevantdate") == false) { + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue); + } else { + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), + getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date")); + } + } + } + } + + //license + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("license"), getFirstItem(license)); + // dateofacceptance + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted)); + + // size + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes)); + + // version + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version)); + + // language + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"), + setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages)))); + + // resource type + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"), + setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes)))); + + // resultType + final String cobjcategoryCode = getFirstItem(cobjcategory); + final String resulttype = getResultType(cobjcategory); + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies")); + + switch (resulttype) { + case "software" : + // format + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("programmingLanguage"), + getSimpleQualifier(getFirstItem(formats), "dnet:programming_languages")); + break; + case "dataset": + for (int i = 0; i < formats.getLength(); i++) { + Node currentNode = formats.item(i); + NodeList childNodes = currentNode.getChildNodes(); + if (childNodes.getLength() > 0) { + String formatValue = childNodes.item(0).getNodeValue(); + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), formatValue); + } + } + break; + case "other": + + break; + } + + // documentationUrl + for (int i = 0; i < documentationUrl.getLength(); i++) { + final Element docUrl = (Element) documentationUrl.item(i); + if (docUrl != null && docUrl.hasChildNodes()) { + final String value = docUrl.getTextContent(); + if (StringUtils.isNotBlank(value)) { + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("documentationUrl"), value); + } + } + } + + ValueMap values = ValueMap.parseNodeList(metadata); + // contexts + if (values.get("concept") != null) { + for (final eu.dnetlib.data.transform.xml.Element e : values.get("concept")) { + final String id = e.getAttributes().get("id"); + if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank"); + metadataProto.addContext(Context.newBuilder().setId(id)); + } + } + + //journal + if (values.containsKey("journal")) { + for (final eu.dnetlib.data.transform.xml.Element journal : values.get("journal")) { + addJournal(metadataProto, journal); + + } + } + + final List hostedBys = getKeyValues(ValueMap.parseNodeList(hostedby), "hostedby", Type.datasource); + final List collectedFroms = getKeyValues(ValueMap.parseNodeList(collectedfrom), "collectedfrom", Type.datasource); + + final Instance.Builder instance = Instance.newBuilder(); + + String tmpRigths = "UNKNOWN"; + final String firstRight = getFirstItem(rights); + if (mappingAccess.containsKey(firstRight)) { + tmpRigths = mappingAccess.get(firstRight); + } + + addField(instance, Instance.getDescriptor().findFieldByName("license"), getFirstItem(license)); + addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBys); + + addField(instance, Instance.getDescriptor().findFieldByName("accessright"), + setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths))); + + addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), + setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(cobjcategoryCode))); + + if (StringUtils.isNotBlank(landingPage)) { + addField(instance, Instance.getDescriptor().findFieldByName("url"), landingPage); + } + //sometimes the instanceUri is blank... + if (StringUtils.isNotBlank(instanceUri)) { + addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri); + } + + addField(instance, Instance.getDescriptor().findFieldByName("distributionlocation"), getFirstItem(distributionlocation)); + + addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFroms); + addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted)); + if (values.get("refereed") != null) { + addField(instance, Instance.getDescriptor().findFieldByName("refereed"), values.get("refereed").listValues()); + } + + final ElementList pcs = values.get("processingchargeamount"); + if (pcs != null && !pcs.isEmpty()) { + addField(instance, Instance.getDescriptor().findFieldByName("processingchargeamount"), pcs.listValues()); + final String currency = pcs.get(0).getAttributeValue("currency"); + if (StringUtils.isNotBlank(currency)) { + addField(instance, Instance.getDescriptor().findFieldByName("processingchargecurrency"), currency); + } + } + + result.addInstance(instance); + + List pids = parsePids(pidList); + + // original ids + final Set originalIdList = Sets.newHashSet(); + for (int i = 0; i < originalIds.getLength(); i++) { + Node currentNode = originalIds.item(i); + if (currentNode != null && currentNode.hasChildNodes()) { + originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue()); + } + } + + OafEntity.Builder entity = + getEntity(Type.result, entityId, collectedFroms, originalIdList, dateOfCollection, dateOfTransformation, pids).setResult( + result.setMetadata(metadataProto)); + + entity.setOaiprovenance(getOAIProvenance(about)); + + Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false)); + return base64(oaf.toByteArray()); + } catch (Exception e) { + e.printStackTrace(System.err); + throw new RuntimeException(e); + } + + } + + private static void createAuthor(final Builder metadataProto, final int i, final Element creator, final NodeList creatorNames) { + final Element creatorName = (Element) creatorNames.item(0); + + final Author.Builder author = Author.newBuilder(); + author.setRank(i+1); + final String fullname = StringUtils.trim(creatorName.getTextContent()); + + author.setFullname(fullname); + + final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + } + final NodeList nameIdentifiers = creator.getElementsByTagName("nameIdentifier"); + if (nameIdentifiers.getLength() > 0) { + final Element nameIdentifier = (Element) nameIdentifiers.item(0); + final String nameIdentifierScheme = nameIdentifier.getAttribute("nameIdentifierScheme"); + final String id = StringUtils.trim(nameIdentifier.getTextContent()); + if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(nameIdentifierScheme)) { + author.addPid(getKV(nameIdentifierScheme, id)); + } + } + + addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("author"), author); + } + + private static String getResultType(final NodeList cobjcategoryNode) { + + final ValueMap values = ValueMap.parseNodeList(cobjcategoryNode); + + final eu.dnetlib.data.transform.xml.Element cobjcategory = values.get("cobjcategory").stream() + .map(e -> StringUtils.isNotBlank(e.getText()) ? e : new eu.dnetlib.data.transform.xml.Element("0000", e.getAttributes())) + .findFirst() + .orElse(new eu.dnetlib.data.transform.xml.Element("0000", new HashMap<>())); + + final String resulttype = cobjcategory.getAttributeValue("type"); + if (StringUtils.isNotBlank(resulttype)) { + return resulttype; + } + + return getDefaultResulttype(cobjcategory); + } + + public static String getFirstItem(final NodeList list) { + String out = ""; + if (list != null) { + + if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) { + out = list.item(0).getChildNodes().item(0).getNodeValue(); + } + } + return out; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Community.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Community.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Community.java (revision 58513) @@ -0,0 +1,72 @@ +package eu.dnetlib.data.bulktag; + +import com.google.gson.Gson; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.util.ArrayList; +import java.util.List; + +/** + * Created by miriam on 01/08/2018. + */ +public class Community { + + private static final Log log = LogFactory.getLog(Community.class); + + private String id; + private List subjects = new ArrayList<>(); + private List datasources = new ArrayList<>(); + private List zenodoCommunities = new ArrayList<>(); + private List organizationCommunity = new ArrayList<>(); + + public List getOrganizationCommunity() { + return organizationCommunity; + } + + public void setOrganizationCommunity(List organizationCommunity) { + this.organizationCommunity = organizationCommunity; + } + + public String toJson() { + final Gson g = new Gson(); + return g.toJson(this); + } + + public boolean isValid() { + return !getSubjects().isEmpty() || !getDatasources().isEmpty() || !getZenodoCommunities().isEmpty(); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getDatasources() { + return datasources; + } + + public void setDatasources(List datasources) { + this.datasources = datasources; + } + + public List getZenodoCommunities() { + return zenodoCommunities; + } + + public void setZenodoCommunities(List zenodoCommunities) { + this.zenodoCommunities = zenodoCommunities; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java (revision 58513) @@ -0,0 +1,191 @@ +package eu.dnetlib.data.bulktag; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; + +import com.google.gson.GsonBuilder; +import eu.dnetlib.data.bulktag.selectioncriteria.InterfaceAdapter; +import eu.dnetlib.data.bulktag.selectioncriteria.Selection; +import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Created by miriam on 02/08/2018. + */ +public class CommunityConfiguration { + + private static final Log log = LogFactory.getLog(CommunityConfiguration.class); + + + enum MapModes{ + SUBJECT_MAP, + DATASOURCE_MAP, + ZENODO_COMMUNITY_MAP + } + + private Map communities; + + + //map subject -> communityid + private transient Map>> subjectMap = new HashMap<>(); + //map datasourceid -> communityid + private transient Map>> datasourceMap = new HashMap<>(); + //map zenodocommunityid -> communityid + private transient Map>> zenodocommunityMap = new HashMap<>(); + //map organizationid -> communityid + private transient Map>> organizationcommunityMap = new HashMap<>(); + + CommunityConfiguration(final Map communities) { + this.communities = communities; + init(); + } + + void init() { + + if (subjectMap == null) { + subjectMap = Maps.newHashMap(); + } + if (datasourceMap == null) { + datasourceMap = Maps.newHashMap(); + } + if (zenodocommunityMap == null) { + zenodocommunityMap = Maps.newHashMap(); + } + if (organizationcommunityMap == null){ + organizationcommunityMap = Maps.newHashMap(); + } + + for(Community c : getCommunities().values()) { + //get subjects + final String id = c.getId(); + for(String sbj : c.getSubjects()){ + Pair p = new Pair<>(id,new SelectionConstraints()); + add(sbj.toLowerCase().trim() , p, subjectMap); + } + //get datasources + for(Datasource d: c.getDatasources()){ + + add(d.getOpenaireId(),new Pair<>(id,d.getSelectionConstraints()),datasourceMap); + } + //get zenodo communities + for(ZenodoCommunity zc : c.getZenodoCommunities()){ + add(zc.getZenodoCommunityId(),new Pair<>(id,zc.getSelCriteria()),zenodocommunityMap); + } + //get organizations + for(Organization org: c.getOrganizationCommunity()){ + add(org.getOrganizationId(), new Pair<>(id,org.getSelCriteria()),organizationcommunityMap); + } + + } + } + + private void add(String key, Pair value, Map>> map){ + List> values = map.get(key); + + if (values == null){ + values = new ArrayList<>(); + map.put(key,values); + } + values.add(value); + } + + public List> getCommunityForSubject(String sbj){ + return subjectMap.get(sbj); + } + + public List> getCommunityForDatasource(String dts){ + return datasourceMap.get(dts); + } + + + public List getCommunityForDatasource(final String dts, final Map> param) { + List> lp = datasourceMap.get(dts); + if (lp==null) + return Lists.newArrayList(); + + return lp.stream().map(p -> { + if (p.getSnd() == null) + return p.getFst(); + if (((SelectionConstraints) p.getSnd()).verifyCriteria(param)) + return p.getFst(); + else + return null; + }).filter(st->(st!=null)).collect(Collectors.toList()); + + + } + + public List getCommunityForOrganizationValue(String org){ + return getContextIds(organizationcommunityMap.get(org)); + } + + public List> getCommunityForOrganization(String org){ + return organizationcommunityMap.get(org); + } + + public List> getCommunityForZenodoCommunity(String zc){ + return zenodocommunityMap.get(zc); + } + + public List getCommunityForSubjectValue(String value) { + + return getContextIds(subjectMap.get(value)); + } + + public List getCommunityForDatasourceValue(String value) { + + return getContextIds(datasourceMap.get(value.toLowerCase())); + } + + public List getCommunityForZenodoCommunityValue(String value){ + + return getContextIds(zenodocommunityMap.get(value.toLowerCase())); + } + + private List getContextIds(List> list) { + if (list != null) { + return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + } + return Lists.newArrayList(); + } + + + public Map getCommunities() { + return communities; + } + + public void setCommunities(Map communities) { + this.communities = communities; + } + + public String toJson() { + GsonBuilder builder = new GsonBuilder(); + builder.registerTypeAdapter(Selection.class, new InterfaceAdapter()); + Gson gson = builder.create(); + + return gson.toJson(this); + } + + public int size() { + return communities.keySet().size(); + } + + public Community getCommunityById(String id){ + return communities.get(id); + } + + public List getCommunityList() { + return Lists.newLinkedList(communities.values()); + } +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/InterfaceAdapter.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/InterfaceAdapter.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/InterfaceAdapter.java (revision 58513) @@ -0,0 +1,37 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + +import com.google.gson.*; + +import java.lang.reflect.Type; + +public class InterfaceAdapter implements JsonSerializer, JsonDeserializer { + + private static final String CLASSNAME = "CLASSNAME"; + private static final String DATA = "DATA"; + + public Object deserialize(JsonElement jsonElement, Type type, + JsonDeserializationContext jsonDeserializationContext) throws JsonParseException { + + JsonObject jsonObject = jsonElement.getAsJsonObject(); + JsonPrimitive prim = (JsonPrimitive) jsonObject.get(CLASSNAME); + String className = prim.getAsString(); + Class klass = getObjectClass(className); + return jsonDeserializationContext.deserialize(jsonObject.get(DATA), klass); + } + public JsonElement serialize(Object jsonElement, Type type, JsonSerializationContext jsonSerializationContext) { + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty(CLASSNAME, jsonElement.getClass().getName()); + jsonObject.add(DATA, jsonSerializationContext.serialize(jsonElement)); + return jsonObject; + } + /****** Helper method to get the className of the object to be deserialized *****/ + public Class getObjectClass(String className) { + try { + return Class.forName(className); + } catch (ClassNotFoundException e) { + //e.printStackTrace(); + throw new JsonParseException(e.getMessage()); + } + } +} + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/ContainsVerb.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/ContainsVerb.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/ContainsVerb.java (revision 58513) @@ -0,0 +1,27 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + +@VerbClass("contains") +public class ContainsVerb implements Selection { + + private String param; + + public ContainsVerb() { + } + + public ContainsVerb(final String param) { + this.param = param; + } + + @Override + public boolean apply(String value) { + return value.contains(param); + } + + public String getParam() { + return param; + } + + public void setParam(String param) { + this.param = param; + } +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbClass.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbClass.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbClass.java (revision 58513) @@ -0,0 +1,13 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface VerbClass { + + public String value(); +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Pair.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Pair.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Pair.java (revision 58513) @@ -0,0 +1,38 @@ +package eu.dnetlib.data.bulktag; + +import com.google.gson.Gson; + +/** + * Created by miriam on 03/08/2018. + */ +public class Pair { + private A fst; + private B snd; + + public A getFst() { + return fst; + } + + public Pair setFst(A fst) { + this.fst = fst; + return this; + } + + public B getSnd() { + return snd; + } + + public Pair setSnd(B snd) { + this.snd = snd; + return this; + } + + public Pair(A a, B b){ + fst = a; + snd = b; + } + + public String toJson(){ + return new Gson().toJson(this); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/Selection.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/Selection.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/Selection.java (revision 58513) @@ -0,0 +1,6 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + +public interface Selection { + + boolean apply(String value); +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/ZenodoCommunity.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/ZenodoCommunity.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/ZenodoCommunity.java (revision 58513) @@ -0,0 +1,46 @@ +package eu.dnetlib.data.bulktag; + +import com.google.gson.Gson; +import org.dom4j.Node; + + +/** + * Created by miriam on 01/08/2018. + */ +public class ZenodoCommunity { + + private String zenodoCommunityId; + + private SelectionConstraints selCriteria; + + public String getZenodoCommunityId() { + return zenodoCommunityId; + } + + public void setZenodoCommunityId(String zenodoCommunityId) { + this.zenodoCommunityId = zenodoCommunityId; + } + + public SelectionConstraints getSelCriteria() { + return selCriteria; + } + + public void setSelCriteria(SelectionConstraints selCriteria) { + this.selCriteria = selCriteria; + } + + private void setSelCriteria(String json){ + //Type collectionType = new TypeToken>(){}.getType(); + selCriteria = new Gson().fromJson(json, SelectionConstraints.class); + + } + + public void setSelCriteria(Node n){ + if (n==null){ + selCriteria = null; + }else{ + setSelCriteria(n.getText()); + } + } + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java (revision 58513) @@ -0,0 +1,1762 @@ +package eu.dnetlib.data.transform.xml; + +import java.nio.charset.Charset; +import java.security.MessageDigest; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + +import com.google.common.base.Predicate; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.protobuf.Descriptors.Descriptor; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import com.google.protobuf.Message.Builder; +import com.google.protobuf.ProtocolMessageEnum; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; +import eu.dnetlib.data.proto.DedupProtos.Dedup; +import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity; +import eu.dnetlib.data.proto.FieldTypeProtos.*; +import eu.dnetlib.data.proto.FieldTypeProtos.Journal; +import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription; +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; +import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization; +import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; +import eu.dnetlib.data.proto.ResultProtos.Result.Metadata; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement; +import eu.dnetlib.data.proto.TypeProtos.Type; +import eu.dnetlib.miscutils.collections.Pair; +import eu.dnetlib.miscutils.iterators.IterablePair; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang.math.NumberUtils; +import org.apache.commons.lang3.StringUtils; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public abstract class AbstractDNetXsltFunctions { + + public static final String URL_REGEX = "^(http|https|ftp)\\://.*"; + private static final int MAX_NSPREFIX_LEN = 12; + public static Predicate urlFilter = s -> s.trim().matches(URL_REGEX); + public static Map code2name = Maps.newHashMap(); + + /* + * Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the + * relational db + */ + //code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)"); + static { + code2name.put("MH","Marshall Islands"); + code2name.put("CF","Central African Republic"); + code2name.put("TD","Chad"); + code2name.put("CN","China (People's Republic of)"); + code2name.put("NG","Nigeria"); + code2name.put("NF","Norfolk Island"); + code2name.put("MP","Northern Mariana Islands"); + code2name.put("PS","Palestinian-administered areas"); + code2name.put("SZ","Swaziland"); + code2name.put("max","Manx"); + code2name.put("TW","Taiwan"); + code2name.put("TJ","Tajikistan"); + code2name.put("BSG","Research for the benefit of specific groups"); + code2name.put("CP","Collaborative project"); + code2name.put("12MONTHS","12 Months Embargo"); + code2name.put("ace","Achinese"); + code2name.put("egy","Ancient Egyptian"); + code2name.put("ara","Arabic"); + code2name.put("arc","Aramaic"); + code2name.put("arp","Arapaho"); + code2name.put("gon","Gondi"); + code2name.put("ine","Indo-European"); + code2name.put("ipk","Inupiaq"); + code2name.put("ira","Iranian"); + code2name.put("lim","Limburgan; Limburger; Limburgish"); + code2name.put("mni","Manipuri"); + code2name.put("mno","Manobo"); + code2name.put("men","Mende"); + code2name.put("CX","Christmas Island"); + code2name.put("CC","Cocos (Keeling) Islands"); + code2name.put("KM","Comoros"); + code2name.put("CG","Congo"); + code2name.put("CK","Cook Islands"); + code2name.put("HR","Croatia"); + code2name.put("arn","Araucanian"); + code2name.put("art","Artificial"); + code2name.put("nah","Aztec"); + code2name.put("bug","Buginese"); + code2name.put("chn","Chinook jargon"); + code2name.put("chv","Chuvash"); + code2name.put("mus","Creek"); + code2name.put("mic","Micmac"); + code2name.put("min","Minangkabau"); + code2name.put("fro","Old French"); + code2name.put("cpp","Portuguese-based Creoles and Pidgins"); + code2name.put("som","Somali"); + code2name.put("wen","Sorbian"); + code2name.put("hrv","Croatian"); + code2name.put("cus","Cushitic"); + code2name.put("sot","Sotho, Southern"); + code2name.put("sai","South American Indian"); + code2name.put("esl/spa","Spanish"); + code2name.put("CU","Cuba"); + code2name.put("CW","Curaçao"); + code2name.put("CZ","Czech Republic"); + code2name.put("DK","Denmark"); + code2name.put("ER","Eritrea"); + code2name.put("TF","French Southern Territories"); + code2name.put("GW","Guinea-Bissau"); + code2name.put("VA","Holy See (Vatican City State)"); + code2name.put("BO","Bolivia"); + code2name.put("KY","Cayman Islands"); + code2name.put("dra","Dravidian"); + code2name.put("cpe","English-based Creoles and Pidgins"); + code2name.put("oji","Ojibwa"); + code2name.put("CIP-EIP-TN","CIP-Eco-Innovation - CIP-Thematic Network"); + code2name.put("jav/jaw","Javanese"); + code2name.put("ach","Acoli"); + code2name.put("ada","Adangme"); + code2name.put("afh","Afrihili"); + code2name.put("afr","Afrikaans"); + code2name.put("afa","Afro-Asiatic"); + code2name.put("ale","Aleut"); + code2name.put("alg","Algonquian languages"); + code2name.put("arw","Arawak"); + code2name.put("asm","Assamese"); + code2name.put("ava","Avaric"); + code2name.put("ave","Avestan"); + code2name.put("bra","Braj"); + code2name.put("bua","Buriat"); + code2name.put("chr","Cherokee"); + code2name.put("chy","Cheyenne"); + code2name.put("jrb","Judeo-Arabic"); + code2name.put("jpr","Judeo-Persian"); + code2name.put("kab","Kabyle"); + code2name.put("kac","Kachin"); + code2name.put("kaa","Kara-Kalpak"); + code2name.put("loz","Lozi"); + code2name.put("mwr","Marwari"); + code2name.put("DJ","Djibouti"); + code2name.put("JM","Jamaica"); + code2name.put("JP","Japan"); + code2name.put("JE","Jersey"); + code2name.put("JO","Jordan"); + code2name.put("KZ","Kazakhstan"); + code2name.put("KE","Kenya"); + code2name.put("KI","Kiribati"); + code2name.put("KR","Korea (Republic of)"); + code2name.put("KP","Korea, Democatric People's Republic of"); + code2name.put("XK","Kosovo * UN resolution"); + code2name.put("KW","Kuwait"); + code2name.put("NL","Netherlands"); + code2name.put("PE","Peru"); + code2name.put("PH","Philippines"); + code2name.put("fre/fra","French"); + code2name.put("PL","Poland"); + code2name.put("PT","Portugal"); + code2name.put("PR","Puerto Rico"); + code2name.put("QA","Qatar"); + code2name.put("RO","Romania"); + code2name.put("RU","Russian Federation"); + code2name.put("RW","Rwanda"); + code2name.put("RE","Réunion"); + code2name.put("sve/swe","Swedish"); + code2name.put("myn","Mayan"); + code2name.put("dum","Middle Dutch"); + code2name.put("mun","Munda"); + code2name.put("nde","Ndebele, North"); + code2name.put("ndo","Ndonga"); + code2name.put("nyn","Nyankole"); + code2name.put("nzi","Nzima"); + code2name.put("oci","Occitan (post 1500); Provençal"); + code2name.put("GU","Guam"); + code2name.put("tut","Altaic"); + code2name.put("awa","Awadhi"); + code2name.put("ban","Balinese"); + code2name.put("bal","Baluchi"); + code2name.put("bai","Bamileke"); + code2name.put("bad","Banda"); + code2name.put("UK","United Kingdom"); + code2name.put("bas","Basa"); + code2name.put("tib/bod","Tibetan"); + code2name.put("ben","Bengali"); + code2name.put("ber","Berber"); + code2name.put("cho","Choctaw"); + code2name.put("cop","Coptic"); + code2name.put("crp","Creoles and Pidgins"); + code2name.put("dak","Dakota"); + code2name.put("del","Delaware"); + code2name.put("div","Divehi"); + code2name.put("kha","Khasi"); + code2name.put("khi","Khoisan"); + code2name.put("kho","Khotanese"); + code2name.put("osa","Osage"); + code2name.put("oss","Ossetian; Ossetic"); + code2name.put("oto","Otomian"); + code2name.put("GT","Guatemala"); + code2name.put("ota","Ottoman"); + code2name.put("GG","Guernsey"); + code2name.put("GY","Guyana"); + code2name.put("LA","Lao (People's Democratic Republic)"); + code2name.put("LB","Lebanon"); + code2name.put("LY","Libyan Arab Jamahiriya"); + code2name.put("LI","Liechtenstein"); + code2name.put("LT","Lithuania"); + code2name.put("LU","Luxembourg"); + code2name.put("PW","Palau"); + code2name.put("BL","Saint-Barthélemy"); + code2name.put("SM","San Marino"); + code2name.put("SX","Sint Maarten (Dutch Part)"); + code2name.put("TL","Timor-Leste"); + code2name.put("TK","Tokelau"); + code2name.put("TO","Tonga"); + code2name.put("TN","Tunisia"); + code2name.put("TC","Turks and Caicos Islands"); + code2name.put("TV","Tuvalu"); + code2name.put("GB","United Kingdom"); + code2name.put("VU","Vanuatu"); + code2name.put("pal","Pahlavi"); + code2name.put("pau","Palauan"); + code2name.put("pam","Pampanga"); + code2name.put("pag","Pangasinan"); + code2name.put("pap","Papiamento"); + code2name.put("fas/per","Persian"); + code2name.put("phn","Phoenician"); + code2name.put("sid","Sidamo"); + code2name.put("GA","Gabon"); + code2name.put("GL","Greenland"); + code2name.put("GD","Grenada"); + code2name.put("GP","Guadeloupe"); + code2name.put("IE","Ireland"); + code2name.put("spa","Spanish; Castilian"); + code2name.put("IM","Isle of Man"); + code2name.put("IT","Italy"); + code2name.put("ES","Spain"); + code2name.put("SR","Suriname"); + code2name.put("TZ","Tanzania (United Republic of)"); + code2name.put("TH","Thailand"); + code2name.put("TG","Togo"); + code2name.put("UG","Uganda"); + code2name.put("UZ","Uzbekistan"); + code2name.put("VE","Venezuela"); + code2name.put("VI","Virgin Islands, U.S."); + code2name.put("WF","Wallis and Futuna"); + code2name.put("COFUND-PCP","COFUND (PCP)"); + code2name.put("amh","Amharic"); + code2name.put("map","Austronesian"); + code2name.put("aym","Aymara"); + code2name.put("bnt","Bantu"); + code2name.put("bak","Bashkir"); + code2name.put("bho","Bhojpuri"); + code2name.put("bik","Bikol"); + code2name.put("bul","Bulgarian"); + code2name.put("cor","Cornish"); + code2name.put("dua","Duala"); + code2name.put("dut/nld","Dutch; Flemish"); + code2name.put("isRelatedTo","isRelatedTo"); + code2name.put("coauthor","coauthor"); + code2name.put("dyu","Dyula"); + code2name.put("eka","Ekajuk"); + code2name.put("gil","Gilbertese"); + code2name.put("suk","Sukuma"); + code2name.put("sux","Sumerian"); + code2name.put("sun","Sundanese"); + code2name.put("sus","Susu"); + code2name.put("swa","Swahili"); + code2name.put("0010","Lecture"); + code2name.put("0007","Master thesis"); + code2name.put("0027","Model"); + code2name.put("0012","Newsletter"); + code2name.put("0020","Other ORP type"); + code2name.put("0038","Other literature type"); + code2name.put("0039","Other dataset type"); + code2name.put("0040","Other software type"); + code2name.put("0013","Part of book or chapter of book"); + code2name.put("0019","Patent"); + code2name.put("0028","PhysicalObject"); + code2name.put("0016","Preprint"); + code2name.put("DM","Dominica"); + code2name.put("DO","Dominican Republic"); + code2name.put("EC","Ecuador"); + code2name.put("EG","Egypt"); + code2name.put("GQ","Equatorial Guinea"); + code2name.put("EE","Estonia"); + code2name.put("ET","Ethiopia"); + code2name.put("GR","Greece"); + code2name.put("HM","Heard Island and McDonald Islands"); + code2name.put("got","Gothic"); + code2name.put("grb","Grebo"); + code2name.put("ell/gre","Greek"); + code2name.put("hat","Haitian; Haitian Creole"); + code2name.put("hau","Hausa"); + code2name.put("haw","Hawaiian"); + code2name.put("heb","Hebrew"); + code2name.put("gai/iri","Irish"); + code2name.put("kar","Karen"); + code2name.put("lui","Luiseno"); + code2name.put("goh","Old High German"); + code2name.put("abk","Abkhazian"); + code2name.put("aar","Afar"); + code2name.put("aggregator::pubsrepository::journals","Journal Aggregator/Publisher"); + code2name.put("pubsrepository::mock","Other"); + code2name.put("pubscatalogue::unknown","Publication Catalogue"); + code2name.put("BI","Burundi"); + code2name.put("CM","Cameroon"); + code2name.put("CD","Congo (Democratic Republic of)"); + code2name.put("CR","Costa Rica"); + code2name.put("CI","Cote d'Ivoire"); + code2name.put("arg","Aragonese"); + code2name.put("aze","Azerbaijani"); + code2name.put("EU","European Union"); + code2name.put("FK","Falkland Islands (Malvinas)"); + code2name.put("scr/hrv","Croatian"); + code2name.put("bam","Bambara"); + code2name.put("baq/eus","Basque"); + code2name.put("bih","Bihari"); + code2name.put("FO","Faroe Islands"); + code2name.put("FJ","Fiji"); + code2name.put("FI","Finland"); + code2name.put("ger/deu","German"); + code2name.put("MK","Former Yugoslav Republic of Macedonia"); + code2name.put("FR","France"); + code2name.put("bis","Bislama"); + code2name.put("cat","Catalan; Valencian"); + code2name.put("cha","Chamorro"); + code2name.put("che","Chechen"); + code2name.put("cos","Corsican"); + code2name.put("elx","Elamite"); + code2name.put("eng","English"); + code2name.put("est","Estonian"); + code2name.put("deu/ger","German"); + code2name.put("gle","Irish"); + code2name.put("gem","Germanic"); + code2name.put("GF","French Guiana"); + code2name.put("PF","French Polynesia"); + code2name.put("GM","Gambia"); + code2name.put("kik","Gikuyu; Kikuyu"); + code2name.put("gre/ell","Greek, Modern (1453-)"); + code2name.put("DE","Germany"); + code2name.put("mac/mkd","Macedonian"); + code2name.put("scc/srp","Serbian"); + code2name.put("grn","Guarani"); + code2name.put("ssw","Swati"); + code2name.put("swe","Swedish"); + code2name.put("syr","Syriac"); + code2name.put("tgl","Tagalog"); + code2name.put("tah","Tahitian"); + code2name.put("tgk","Tajik"); + code2name.put("tmh","Tamashek"); + code2name.put("tam","Tamil"); + code2name.put("tat","Tatar"); + code2name.put("aggregator::pubsrepository::institutional","Institutional Repository Aggregator"); + code2name.put("per/fas","Persian"); + code2name.put("FCT","Fundação para a Ciência e Tecnologia"); + code2name.put("user:claim:pid","user:claim:pid"); + code2name.put("entityregistry","Registry"); + code2name.put("hin","Hindi"); + code2name.put("NA","Namibia"); + code2name.put("ido","Ido"); + code2name.put("ibo","Igbo"); + code2name.put("orcid","Open Researcher and Contributor ID"); + code2name.put("TT","Trinidad and Tobago"); + code2name.put("TR","Turkey"); + code2name.put("TM","Turkmenistan"); + code2name.put("arXiv","arXiv"); + code2name.put("providedBy","provided by"); + code2name.put("EMBARGO","Embargo"); + code2name.put("dataset_dataset","dataset_dataset"); + code2name.put("publication_dataset","publication_dataset"); + code2name.put("publication_publication","publication_publication"); + code2name.put("coordinator","coordinator"); + code2name.put("participant","participant"); + code2name.put("subcontractor","subcontractor"); + code2name.put("principal investigating","principal investigating"); + code2name.put("exploitation","exploitation"); + code2name.put("OPEN","Open Access"); + code2name.put("OPEN SOURCE","Open Source"); + code2name.put("doi","doi"); + code2name.put("orcidworkid","orcid workid"); + code2name.put("MQ","Martinique"); + code2name.put("MR","Mauritania"); + code2name.put("jpn","Japanese"); + code2name.put("pubsrepository::unknown","Publication Repository"); + code2name.put("aggregator::pubsrepository::unknown","Publication Repository Aggregator"); + code2name.put("UA","Ukraine"); + code2name.put("YT","Mayotte"); + code2name.put("OTHER","Other"); + code2name.put("RESTRICTED","Restricted"); + code2name.put("AE","United Arab Emirates"); + code2name.put("aka","Akan"); + code2name.put("US","United States"); + code2name.put("author","author"); + code2name.put("isResultOf","isResultOf"); + code2name.put("kin","Kinyarwanda"); + code2name.put("kom","Komi"); + code2name.put("new","Newari"); + code2name.put("NR","Nauru"); + code2name.put("FM","Micronesia, Federated States of"); + code2name.put("NP","Nepal"); + code2name.put("MN","Mongolia"); + code2name.put("rum/ron","Romanian"); + code2name.put("submitted","submitted"); + code2name.put("driver-openaire2.0","OpenAIRE 2.0+ (DRIVER OA, EC funding)"); + code2name.put("result","result"); + code2name.put("roh","Raeto-Romance"); + code2name.put("run","Rundi"); + code2name.put("bin","Bini"); + code2name.put("bos","Bosnian"); + code2name.put("din","Dinka"); + code2name.put("tel","Telugu"); + code2name.put("MA","Morocco"); + code2name.put("MZ","Mozambique"); + code2name.put("ewo","Ewondo"); + code2name.put("ter","Tereno"); + code2name.put("fat","Fanti"); + code2name.put("fao","Faroese"); + code2name.put("hai","Haida"); + code2name.put("MM","Myanmar"); + code2name.put("NU","Niue"); + code2name.put("PK","Pakistan"); + code2name.put("PG","Papua New Guinea"); + code2name.put("file::WoS","file::WoS"); + code2name.put("metadata","metadata"); + code2name.put("file::hybrid","file::hybrid"); + code2name.put("nbl","Ndebele, South"); + code2name.put("akk","Akkadian"); + code2name.put("alb/sqi","Albanian"); + code2name.put("arm/hye","Armenian"); + code2name.put("ath","Athapascan"); + code2name.put("CA","Canada"); + code2name.put("CV","Cape Verde"); + code2name.put("CL","Chile"); + code2name.put("bat","Baltic"); + code2name.put("CO","Colombia"); + code2name.put("CY","Cyprus"); + code2name.put("SV","El Salvador"); + code2name.put("HT","Haiti"); + code2name.put("bej","Beja"); + code2name.put("HN","Honduras"); + code2name.put("HK","Hong Kong"); + code2name.put("HU","Hungary"); + code2name.put("bel","Belarusian"); + code2name.put("bem","Bemba"); + code2name.put("slo/slk","Slovak"); + code2name.put("bre","Breton"); + code2name.put("car","Carib"); + code2name.put("cau","Caucasian"); + code2name.put("ewe","Ewe"); + code2name.put("tha","Thai"); + code2name.put("fan","Fang"); + code2name.put("fij","Fijian"); + code2name.put("fin","Finnish"); + code2name.put("her","Herero"); + code2name.put("hil","Hiligaynon"); + code2name.put("bod/tib","Tibetan"); + code2name.put("tig","Tigre"); + code2name.put("tir","Tigrinya"); + code2name.put("tem","Timne"); + code2name.put("wel/cym","Welsh"); + code2name.put("KO","Kosovo * UN resolution"); + code2name.put("tiv","Tivi"); + code2name.put("tli","Tlingit"); + code2name.put("ton","Tonga (Tonga Islands)"); + code2name.put("tog","Tonga(Nyasa)"); + code2name.put("tru","Truk"); + code2name.put("tsi","Tsimshian"); + code2name.put("tso","Tsonga"); + code2name.put("tsn","Tswana"); + code2name.put("IsPreviousVersionOf","IsPreviousVersionOf"); + code2name.put("IsReferencedBy","IsReferencedBy"); + code2name.put("References","References"); + code2name.put("IS","Iceland"); + code2name.put("IN","India"); + code2name.put("ID","Indonesia"); + code2name.put("IL","Israel"); + code2name.put("NZ","New Zealand"); + code2name.put("NI","Nicaragua"); + code2name.put("NE","Niger"); + code2name.put("ARK","ARK"); + code2name.put("BW","Botswana"); + code2name.put("BR","Brazil"); + code2name.put("BF","Burkina Faso"); + code2name.put("KH","Cambodia"); + code2name.put("hmo","Hiri Motu"); + code2name.put("hun","Hungarian"); + code2name.put("ice/isl","Icelandic"); + code2name.put("ind","Indonesian"); + code2name.put("ile","Interlingue"); + code2name.put("kam","Kamba"); + code2name.put("lub","Luba-Katanga"); + code2name.put("nav","Navajo; Navaho"); + code2name.put("datasetsbyproject","datasetsbyproject"); + code2name.put("ISSN","ISSN"); + code2name.put("MC","Support for training and career development of researchers (Marie Curie)"); + code2name.put("nor","Norwegian"); + code2name.put("file","file"); + code2name.put("ISTC","ISTC"); + code2name.put("CSA-LS","CSA Lump sum"); + code2name.put("MX","Mexico"); + code2name.put("ME","Montenegro"); + code2name.put("ceb","Cebuano"); + code2name.put("nub","Nubian"); + code2name.put("nym","Nyamwezi"); + code2name.put("nyo","Nyoro"); + code2name.put("tum","Tumbuka"); + code2name.put("tur","Turkish"); + code2name.put("tuk","Turkmen"); + code2name.put("dnet:od_subjects","OpenDOAR subjects"); + code2name.put("wos","Web of Science Subject Areas"); + code2name.put("arxiv","arXiv"); + code2name.put("nsf:fieldOfApplication","Field of Application (NSF)"); + code2name.put("NetCDF","NetCDF"); + code2name.put("OpenDAP","OpenDAP"); + code2name.put("api","api"); + code2name.put("datasetsbyjournal","datasetsbyjournal"); + code2name.put("DOI","DOI"); + code2name.put("EAN13","EAN13"); + code2name.put("EISSN","EISSN"); + code2name.put("Handle","Handle"); + code2name.put("ISBN","ISBN"); + code2name.put("LISSN","LISSN"); + code2name.put("LSID","LSID"); + code2name.put("PURL","PURL"); + code2name.put("UPC","UPC"); + code2name.put("URL","URL"); + code2name.put("URN","URN"); + code2name.put("cel","Celtic"); + code2name.put("chg","Chagatai"); + code2name.put("chb","Chibcha"); + code2name.put("AF","Afghanistan"); + code2name.put("AL","Albania"); + code2name.put("PY","Paraguay"); + code2name.put("PN","Pitcairn"); + code2name.put("KN","Saint Kitts and Nevis"); + code2name.put("UY","Uruguay"); + code2name.put("VN","Viet Nam"); + code2name.put("VG","Virgin Islands (British)"); + code2name.put("EH","Western Sahara"); + code2name.put("YE","Yemen"); + code2name.put("YU","Yugoslavia"); + code2name.put("ZW","Zimbabwe"); + code2name.put("ec:hasprogram","hasprogram"); + code2name.put("ec:hasspecificprogram","hasspecificprogram"); + code2name.put("available","available"); + code2name.put("chi/zho","Chinese"); + code2name.put("ces/cze","Czech"); + code2name.put("guj","Gujarati"); + code2name.put("him","Himachali"); + code2name.put("hup","Hupa"); + code2name.put("iba","Iban"); + code2name.put("ijo","Ijo"); + code2name.put("ilo","Iloko"); + code2name.put("inc","Indic"); + code2name.put("kan","Kannada"); + code2name.put("DZ","Algeria"); + code2name.put("BT","Bhutan"); + code2name.put("kau","Kanuri"); + code2name.put("mul","Multiple languages"); + code2name.put("BA","Bosnia and Herzegovina"); + code2name.put("MU","Mauritius"); + code2name.put("CSA","Coordination and support action"); + code2name.put("fileCSV","fileCSV"); + code2name.put("AS","American Samoa"); + code2name.put("ERC","Support for frontier research (ERC)"); + code2name.put("IA","Innovation action"); + code2name.put("AD","Andorra"); + code2name.put("AO","Angola"); + code2name.put("AI","Anguilla"); + code2name.put("AQ","Antarctica"); + code2name.put("AG","Antigua and Barbuda"); + code2name.put("AR","Argentina"); + code2name.put("AM","Armenia"); + code2name.put("AW","Aruba"); + code2name.put("AU","Australia"); + code2name.put("AT","Austria"); + code2name.put("AZ","Azerbaijan"); + code2name.put("BS","Bahamas"); + code2name.put("BH","Bahrain"); + code2name.put("BE","Belgium"); + code2name.put("BZ","Belize"); + code2name.put("BJ","Benin"); + code2name.put("BM","Bermuda"); + code2name.put("GE","Georgia"); + code2name.put("GH","Ghana"); + code2name.put("GI","Gibraltar"); + code2name.put("GN","Guinea"); + code2name.put("IR","Iran (Islamic Republic of)"); + code2name.put("IQ","Iraq"); + code2name.put("6MONTHS","6 Months Embargo"); + code2name.put("CLOSED","Closed Access"); + code2name.put("ina","Auxiliary Language Association)"); + code2name.put("bur/mya","Burmese"); + code2name.put("cad","Caddo"); + code2name.put("cai","Central American Indian"); + code2name.put("chu","Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic"); + code2name.put("kal","Greenlandic; Kalaallisut"); + code2name.put("iku","Inuktitut"); + code2name.put("iro","Iroquoian"); + code2name.put("ita","Italian"); + code2name.put("jav","Javanese"); + code2name.put("kua","Kuanyama; Kwanyama"); + code2name.put("kum","Kumyk"); + code2name.put("kru","Kurukh"); + code2name.put("kus","Kusaie"); + code2name.put("vie","Vietnamese"); + code2name.put("vol","Volapük"); + code2name.put("vot","Votic"); + code2name.put("wak","Wakashan"); + code2name.put("wal","Walamo"); + code2name.put("wln","Walloon"); + code2name.put("war","Waray"); + code2name.put("ST","São Tomé and PrÃncipe"); + code2name.put("endDate","endDate"); + code2name.put("issued","issued"); + code2name.put("startDate","startDate"); + code2name.put("FCH2-CSA","Coordination & support action"); + code2name.put("nic","Niger-Kordofanian"); + code2name.put("ssa","Nilo-Saharan"); + code2name.put("MSCA-RISE","RISE"); + code2name.put("RIA","Research and Innovation action"); + code2name.put("MSCA-IF-EF-ST","Standard EF"); + code2name.put("PendingRepositoryResources","Pending datasource"); + code2name.put("RepositoryServiceResources","Valid datasource"); + code2name.put("publication","publication"); + code2name.put("niu","Niuean"); + code2name.put("sysimport:crosswalk:aggregator","sysimport:crosswalk:aggregator"); + code2name.put("sysimport:crosswalk:cris","sysimport:crosswalk:cris"); + code2name.put("sysimport:crosswalk:datasetarchive","sysimport:crosswalk:datasetarchive"); + code2name.put("sysimport:crosswalk:entityregistry","sysimport:crosswalk:entityregistry"); + code2name.put("non","Norse"); + code2name.put("nai","North American Indian"); + code2name.put("sme","Northern Sami"); + code2name.put("nno","Norwegian Nynorsk; Nynorsk, Norwegian"); + code2name.put("yor","Yoruba"); + code2name.put("nob","BokmÃ¥l, Norwegian; Norwegian BokmÃ¥l"); + code2name.put("kaz","Kazakh"); + code2name.put("khm","Khmer"); + code2name.put("kor","Korean"); + code2name.put("ltz","Letzeburgesch; Luxembourgish"); + code2name.put("mar","Marathi"); + code2name.put("mas","Masai"); + code2name.put("enm","Middle English"); + code2name.put("frm","Middle French"); + code2name.put("mis","Miscellaneous"); + code2name.put("zap","Zapotec"); + code2name.put("zul","Zulu"); + code2name.put("KG","Kyrgyzstan"); + code2name.put("LV","Latvia"); + code2name.put("LS","Lesotho"); + code2name.put("LR","Liberia"); + code2name.put("MO","Macao"); + code2name.put("MG","Madagascar"); + code2name.put("MW","Malawi"); + code2name.put("MY","Malaysia"); + code2name.put("MD","Moldova (Republic of)"); + code2name.put("MS","Montserrat"); + code2name.put("AX","Ã…land Islands"); + code2name.put("moh","Mohawk"); + code2name.put("mol","Moldavian"); + code2name.put("mkh","Mon-Kmer"); + code2name.put("lol","Mongo"); + code2name.put("copyrighted","copyrighted"); + code2name.put("created","created"); + code2name.put("updated","updated"); + code2name.put("valid","valid"); + code2name.put("BBI-IA-DEMO","Bio-based Industries Innovation action - Demonstration"); + code2name.put("MSCA-IF-EF-CAR","CAR – Career Restart panel"); + code2name.put("MSCA-ITN-ETN","European Training Networks"); + code2name.put("interactiveResource","interactiveResource"); + code2name.put("model","model"); + code2name.put("ML","Mali"); + code2name.put("FCH2-RIA","FCH2 Research and Innovation action"); + code2name.put("MSCA-COFUND-FP","Fellowship programmes"); + code2name.put("physicalObject","physicalObject"); + code2name.put("MSCA-IF-GF","Global Fellowships"); + code2name.put("sysimport:crosswalk:infospace","sysimport:crosswalk:infospace"); + code2name.put("sysimport:crosswalk:repository","sysimport:crosswalk:repository"); + code2name.put("sysimport:mining:aggregator","sysimport:mining:aggregator"); + code2name.put("fry","Frisian"); + code2name.put("gaa","Ga"); + code2name.put("gae/gdh","Gaelic"); + code2name.put("service","service"); + code2name.put("software","software"); + code2name.put("sound","sound"); + code2name.put("glg","Galician"); + code2name.put("lug","Ganda"); + code2name.put("gay","Gayo"); + code2name.put("gez","Geez"); + code2name.put("MT","Malta"); + code2name.put("text","text"); + code2name.put("AN","Netherlands Antilles"); + code2name.put("NC","New Caledonia"); + code2name.put("NO","Norway"); + code2name.put("OC","Oceania"); + code2name.put("user:claim:search","user:claim:search"); + code2name.put("OM","Oman"); + code2name.put("PA","Panama"); + code2name.put("user:insert","user:insert"); + code2name.put("171","Article 171 of the Treaty"); + code2name.put("nya","Chewa; Chichewa; Nyanja"); + code2name.put("cre","Cree"); + code2name.put("geo/kat","Georgian"); + code2name.put("dan","Danish"); + code2name.put("MV","Maldives"); + code2name.put("dzo","Dzongkha"); + code2name.put("efi","Efik"); + code2name.put("LC","Saint Lucia"); + code2name.put("zun","Zuni"); + code2name.put("sga","old Irish"); + code2name.put("file::EuropePMC","file::EuropePMC"); + code2name.put("MF","Saint Martin (French Part)"); + code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)"); + code2name.put("file::PDF","file::PDF"); + code2name.put("esk","Eskimo"); + code2name.put("ec:program","program"); + code2name.put("epo","Esperanto"); + code2name.put("fct:program","fct:program"); + code2name.put("ec:specificprogram","specificprogram"); + code2name.put("collection","collection"); + code2name.put("ERC-ADG","Advanced Grant"); + code2name.put("ERA-NET-Cofund","ERA-NET Cofund"); + code2name.put("dataset","dataset"); + code2name.put("event","event"); + code2name.put("ERC-LVG","ERC low value grant"); + code2name.put("film","film"); + code2name.put("image","image"); + code2name.put("SL","Sierra Leone"); + code2name.put("ec:hasframeworkprogram","hasframeworkprogram"); + code2name.put("ERC-POC","Proof of Concept Grant"); + code2name.put("sysimport:mining:cris","sysimport:mining:cris"); + code2name.put("sysimport:mining:datasetarchive","sysimport:mining:datasetarchive"); + code2name.put("CP-CSA","Combination of CP & CSA"); + code2name.put("NoE","Network of Excellence"); + code2name.put("grc","Ancient Greek"); + code2name.put("lat","Latin"); + code2name.put("ori","Oriya"); + code2name.put("orm","Oromo"); + code2name.put("nso","Sotho"); + code2name.put("ddc","Dewey Decimal Classification"); + code2name.put("zen","Zenaga"); + code2name.put("ec:h2020topics","Horizon 2020 Topics"); + code2name.put("alternative title","alternative title"); + code2name.put("mesheuropmc","Medical Subject Headings"); + code2name.put("apa","Apache"); + code2name.put("SH","Saint Helena, Ascension and Tristan da Cunha"); + code2name.put("PM","Saint Pierre and Miquelon"); + code2name.put("MSCA-COFUND-DP","Doctoral programmes"); + code2name.put("VC","Saint Vincent and the Grenadines"); + code2name.put("ECSEL-IA","ECSEL Innovation Action"); + code2name.put("kpe","Kpelle"); + code2name.put("ECSEL-RIA","ECSEL Research and Innovation Actions"); + code2name.put("MSCA-ITN-EID","European Industrial Doctorates"); + code2name.put("sysimport:mining:entityregistry","sysimport:mining:entityregistry"); + code2name.put("sysimport:mining:infospace","sysimport:mining:infospace"); + code2name.put("sysimport:mining:repository","sysimport:mining:repository"); + code2name.put("main title","main title"); + code2name.put("subtitle","subtitle"); + code2name.put("translated title","translated title"); + code2name.put("lav","Latvian"); + code2name.put("kro","Kru"); + code2name.put("kur","Kurdish"); + code2name.put("kut","Kutenai"); + code2name.put("pli","Pali"); + code2name.put("pan","Panjabi; Punjabi"); + code2name.put("paa","Papuan-Australian"); + code2name.put("peo","Persian, Old (ca 600 - 400 B.C.)"); + code2name.put("zha","Zhuang; Chuang"); + code2name.put("pmc","pmc"); + code2name.put("pmid","pmid"); + code2name.put("urn","urn"); + code2name.put("IO","British Indian Ocean Territory"); + code2name.put("WS","Samoa"); + code2name.put("SA","Saudi Arabia"); + code2name.put("SN","Senegal"); + code2name.put("RS","Serbia"); + code2name.put("MSCA-ITN-EJD","European Joint Doctorates"); + code2name.put("wt:hasParentFunding","wt:hasParentFunding"); + code2name.put("lad","Ladino"); + code2name.put("bla","Siksika"); + code2name.put("lah","Lahnda"); + code2name.put("lam","Lamba"); + code2name.put("lao","Lao"); + code2name.put("snd","Sindhi"); + code2name.put("son","Songhai"); + code2name.put("DFG","DFG Classification"); + code2name.put("SC","Seychelles"); + code2name.put("SG","Singapore"); + code2name.put("SK","Slovakia"); + code2name.put("SI","Slovenia"); + code2name.put("lez","Lezghian"); + code2name.put("SB","Solomon Islands"); + code2name.put("SO","Somalia"); + code2name.put("ZA","South Africa"); + code2name.put("GS","South Georgia and the South Sandwich Islands"); + code2name.put("fiu","Finno-Ugrian"); + code2name.put("fon","Fon"); + code2name.put("fra/fre","French"); + code2name.put("cpf","French-based Creoles and Pidgins"); + code2name.put("SS","South Sudan"); + code2name.put("ful","Fulah"); + code2name.put("gla","Gaelic; Scottish Gaelic"); + code2name.put("kas","Kashmiri"); + code2name.put("LK","Sri Lanka"); + code2name.put("SD","Sudan"); + code2name.put("SJ","Svalbard and Jan Mayen"); + code2name.put("SE","Sweden"); + code2name.put("CH","Switzerland"); + code2name.put("SY","Syrian Arab Republic"); + code2name.put("fct:hasParentFunding","fct:hasParentFunding"); + code2name.put("FCH2-IA","FCH2 Innovation action"); + code2name.put("MSCA-IF-EF-RI","RI – Reintegration panel"); + code2name.put("kaw","Kawi"); + code2name.put("kir","Kirghiz"); + code2name.put("kon","Kongo"); + code2name.put("kok","Konkani"); + code2name.put("lin","Lingala"); + code2name.put("lit","Lithuanian"); + code2name.put("lun","Lunda"); + code2name.put("luo","Luo"); + code2name.put("mac/mak","Macedonian"); + code2name.put("mak","Makasar"); + code2name.put("mlt","Maltese"); + code2name.put("pol","Polish"); + code2name.put("pon","Ponape"); + code2name.put("por","Portuguese"); + code2name.put("pra","Prakrit"); + code2name.put("pro","Provencal"); + code2name.put("pus","Pushto"); + code2name.put("que","Quechua"); + code2name.put("raj","Rajasthani"); + code2name.put("rar","Rarotongan"); + code2name.put("roa","Romance"); + code2name.put("ron/rum","Romanian"); + code2name.put("rom","Romany"); + code2name.put("rus","Russian"); + code2name.put("sit","Sino-Tibetan"); + code2name.put("sio","Siouan"); + code2name.put("fileGzip","fileGzip"); + code2name.put("files_by_rpc","files_by_rpc"); + code2name.put("files_from_mdstore","files_from_mdstore"); + code2name.put("files_from_metadata","files_from_metadata"); + code2name.put("scr","Serbo-Croatian"); + code2name.put("mad","Madurese"); + code2name.put("mag","Magahi"); + code2name.put("mai","Maithili"); + code2name.put("mlg","Malagasy"); + code2name.put("may/msa","Malay"); + code2name.put("mal","Malayalam"); + code2name.put("man","Mandingo"); + code2name.put("glv","Manx"); + code2name.put("mao/mri","Maori"); + code2name.put("chm","Mari"); + code2name.put("srr","Serer"); + code2name.put("shn","Shan"); + code2name.put("sna","Shona"); + code2name.put("iii","Sichuan Yi"); + code2name.put("sin","Sinhala; Sinhalese"); + code2name.put("sla","Slavic"); + code2name.put("slk/slo","Slovak"); + code2name.put("slv","Slovenian"); + code2name.put("sog","Sogdian"); + code2name.put("Contract","Contract"); + code2name.put("Contract Interagency Agreement","Contract Interagency Agreement"); + code2name.put("Cooperative Agreement","Cooperative Agreement"); + code2name.put("Fellowship","Fellowship"); + code2name.put("Fixed Price Award","Fixed Price Award"); + code2name.put("Interagency Agreement","Interagency Agreement"); + code2name.put("Intergovernmental Personnel Award","Intergovernmental Personnel Award"); + code2name.put("Personnel Agreement","Personnel Agreement"); + code2name.put("Standard Grant","Standard Grant"); + code2name.put("GAA","GAA"); + code2name.put("mah","Marshallese"); + code2name.put("gmh","Middle High German"); + code2name.put("mga","Middle Irish"); + code2name.put("filesystem","filesystem"); + code2name.put("ftp","ftp"); + code2name.put("http","http"); + code2name.put("SME-1","SME instrument phase 1"); + code2name.put("SME-2","SME instrument phase 2"); + code2name.put("SGA-CSA","Specific Grant agreement and Coordination and Support Action"); + code2name.put("mon","Mongolian"); + code2name.put("mos","Mossi"); + code2name.put("nau","Nauru"); + code2name.put("nep","Nepali"); + code2name.put("ang","Old English"); + code2name.put("sal","Salishan"); + code2name.put("sam","Samaritan"); + code2name.put("smi","Sami"); + code2name.put("smo","Samoan"); + code2name.put("sad","Sandawe"); + code2name.put("sag","Sango"); + code2name.put("san","Sanskrit"); + code2name.put("srd","Sardinian"); + code2name.put("sco","Scots"); + code2name.put("sel","Selkup"); + code2name.put("sem","Semitic"); + code2name.put("srp","Serbian"); + code2name.put("tyv","Tuvinian"); + code2name.put("twi","Twi"); + code2name.put("uga","Ugaritic"); + code2name.put("uig","Uighur; Uyghur"); + code2name.put("ukr","Ukrainian"); + code2name.put("umb","Umbundu"); + code2name.put("und","Undetermined"); + code2name.put("urd","Urdu"); + code2name.put("uzb","Uzbek"); + code2name.put("vai","Vai"); + code2name.put("ven","Venda"); + code2name.put("was","Washo"); + code2name.put("cym/wel","Welsh"); + code2name.put("wol","Wolof"); + code2name.put("xho","Xhosa"); + code2name.put("sah","Yakut"); + code2name.put("yao","Yao"); + code2name.put("yap","Yap"); + code2name.put("yid","Yiddish"); + code2name.put("httpCSV","httpCSV"); + code2name.put("httpList","httpList"); + code2name.put("jdbc","jdbc"); + code2name.put("oai","oai"); + code2name.put("oai_sets","oai_sets"); + code2name.put("other","other"); + code2name.put("re3data","re3data"); + code2name.put("rest","rest"); + code2name.put("sftp","sftp"); + code2name.put("soap","soap"); + code2name.put("sparql","sparql"); + code2name.put("sword","sword"); + code2name.put("targz","targz"); + code2name.put("ec:frameworkprogram","frameworkprogram"); + code2name.put("UNKNOWN","UNKNOWN"); + code2name.put("0021","Dataset"); + code2name.put("0006","Doctoral thesis"); + code2name.put("0023","Event"); + code2name.put("0009","External research report"); + code2name.put("0024","Film"); + code2name.put("0025","Image"); + code2name.put("0026","InteractiveResource"); + code2name.put("0011","Internal report"); + code2name.put("0017","Report"); + code2name.put("0014","Research"); + code2name.put("0015","Review"); + code2name.put("0029","Software"); + code2name.put("0032","Software Paper"); + code2name.put("0030","Sound"); + code2name.put("0000","Unknown"); + code2name.put("0034","Project deliverable"); + code2name.put("0035","Project proposal"); + code2name.put("0036","Project milestone"); + code2name.put("0037","Clinical Trial"); + code2name.put("crissystem","CRIS System"); + code2name.put("datarepository::unknown","Data Repository"); + code2name.put("aggregator::datarepository","Data Repository Aggregator"); + code2name.put("infospace","Information Space"); + code2name.put("pubsrepository::institutional","Institutional Repository"); + code2name.put("pubsrepository::journal","Journal"); + code2name.put("scholarcomminfra","Scholarly Comm. Infrastructure"); + code2name.put("pubsrepository::thematic","Thematic Repository"); + code2name.put("websource","Web Source"); + code2name.put("entityregistry::projects","Funder database"); + code2name.put("entityregistry::repositories","Registry of repositories"); + code2name.put("wt:fundingStream","Wellcome Trust: Funding Stream"); + code2name.put("IsCitedBy","IsCitedBy"); + code2name.put("IsNewVersionOf","IsNewVersionOf"); + code2name.put("IsPartOf","IsPartOf"); + code2name.put("COFUND-EJP","COFUND (European Joint Programme)"); + code2name.put("COFUND-PPI","COFUND (PPI)"); + code2name.put("CS2-IA","CS2 Innovation Action"); + code2name.put("CS2-RIA","CS2 Research and Innovation action"); + code2name.put("files","files"); + code2name.put("ERC-COG","Consolidator Grant"); + code2name.put("SESAR-RIA","SESAR: Research and Innovation action"); + code2name.put("SGA-RIA","SGA Research and Innovation action"); + code2name.put("ERC-STG","Starting Grant"); + code2name.put("BOA/Task Order","BOA/Task Order"); + code2name.put("0018","Annotation"); + code2name.put("0001","Article"); + code2name.put("0033","Audiovisual"); + code2name.put("0008","Bachelor thesis"); + code2name.put("Continuing grant","Continuing grant"); + code2name.put("0002","Book"); + code2name.put("0022","Collection"); + code2name.put("0004","Conference object"); + code2name.put("0005","Contribution for newspaper or weekly magazine"); + code2name.put("0031","Data Paper"); + code2name.put("BD","Bangladesh"); + code2name.put("BB","Barbados"); + code2name.put("BY","Belarus"); + code2name.put("BQ","Bonaire, Sint Eustatius and Saba"); + code2name.put("BV","Bouvet Island"); + code2name.put("BN","Brunei Darussalam"); + code2name.put("BG","Bulgaria"); + code2name.put("UM","United States Minor Outlying Islands"); + code2name.put("ZM","Zambia"); + code2name.put("openaire2.0","OpenAIRE 2.0 (EC funding)"); + code2name.put("openaire3.0","OpenAIRE 3.0 (OA, funding)"); + code2name.put("driver","OpenAIRE Basic (DRIVER OA)"); + code2name.put("native","proprietary"); + code2name.put("hostedBy","collected from a compatible aggregator"); + code2name.put("notCompatible","under validation"); + code2name.put("BBI-IA-FLAG","Bio-based Industries Innovation action - Flagship"); + code2name.put("BBI-RIA","Bio-based Industries Research and Innovation action"); + } + + protected static String getDefaultResulttype(final Element cobjcategory) { + switch (cobjcategory.getText()) { + case "0029": + case "0040": + return "software"; + case "0021": + case "0024": + case "0025": + case "0030": + case "0039": + return "dataset"; + case "0000": + case "0010": + case "0018": + case "0020": + case "0022": + case "0023": + case "0026": + case "0027": + case "0028": + case "0037": + return "other"; + case "0001": + case "0002": + case "0004": + case "0005": + case "0006": + case "0007": + case "0008": + case "0009": + case "0011": + case "0012": + case "0013": + case "0014": + case "0015": + case "0016": + case "0017": + case "0019": + case "0031": + case "0032": + case "0034": + case "0035": + case "0036": + case "0038": + return "publication"; + default: + return "publication"; + } + } + + protected static OafRel.Builder getRelBuilder(final RelType rType, final SubRelType subRelType, OafRel.Builder rel, final Builder subRel) { + + switch(rType) { + + case datasourceOrganization: + return rel.setDatasourceOrganization(DatasourceOrganization.newBuilder().setProvision((Provision.Builder) subRel)); + case projectOrganization: + return rel.setProjectOrganization(ProjectOrganization.newBuilder().setParticipation((Participation.Builder) subRel)); + case resultOrganization: + return rel.setResultOrganization(ResultOrganization.newBuilder().setAffiliation((Affiliation.Builder) subRel)); + case resultProject: + return rel.setResultProject(ResultProject.newBuilder().setOutcome((Outcome.Builder) subRel)); + case resultResult: + final ResultResult.Builder rr = ResultResult.newBuilder(); + switch (subRelType) { + + case similarity: + return rel.setResultResult(rr.setSimilarity((Similarity.Builder) subRel)); + case publicationDataset: + return rel.setResultResult(rr.setPublicationDataset((PublicationDataset.Builder) subRel)); + case dedup: + return rel.setResultResult(rr.setDedup((Dedup.Builder) subRel)); + case dedupSimilarity: + return rel.setResultResult(rr.setDedupSimilarity((DedupSimilarity.Builder) subRel)); + case supplement: + return rel.setResultResult(rr.setSupplement((Supplement.Builder) subRel)); + case part: + return rel.setResultResult(rr.setPart((Part.Builder) subRel)); + default: + throw new IllegalArgumentException("invalid subRelType for result_result relations: " + subRelType.toString()); + } + case organizationOrganization: + final OrganizationOrganization.Builder oo = OrganizationOrganization.newBuilder(); + switch (subRelType) { + case dedup: + return rel.setOrganizationOrganization(oo.setDedup((Dedup.Builder) subRel)); + case dedupSimilarity: + return rel.setOrganizationOrganization(oo.setDedupSimilarity((DedupSimilarity.Builder) subRel)); + default: + throw new IllegalArgumentException("invalid subRelType for organization_organization relations: " + subRelType.toString()); + } + } + throw new IllegalArgumentException("invalid relation type " + rType.toString()); + } + + protected static Builder getSubRelBuilder(final RelMetadata.Builder metadata, final SubRelType subRelType, final Map params) { + + switch (subRelType) { + + case provision: + return Provision.newBuilder().setRelMetadata(metadata); + case outcome: + return Outcome.newBuilder().setRelMetadata(metadata); + case similarity: + return Similarity.newBuilder().setRelMetadata(metadata); + case publicationDataset: + return PublicationDataset.newBuilder().setRelMetadata(metadata); + case affiliation: + return Affiliation.newBuilder().setRelMetadata(metadata); + case dedup: + return Dedup.newBuilder().setRelMetadata(metadata); + case dedupSimilarity: + return DedupSimilarity.newBuilder().setRelMetadata(metadata); + case supplement: + return Supplement.newBuilder().setRelMetadata(metadata); + case part: + return Part.newBuilder().setRelMetadata(metadata); + } + throw new IllegalArgumentException("invalid relation type " + subRelType.toString()); + } + + protected static String getVocabularyName(final RelType relType) { + switch (relType) { + + case datasourceOrganization: + return "dnet:datasource_organization_relations"; + case projectOrganization: + return "dnet:project_organization_relations"; + case resultOrganization: + return "dnet:result_organization_relations"; + case resultProject: + return "dnet:result_project_relations"; + case resultResult: + return "dnet:result_result_relations"; + case organizationOrganization: + return "dnet:organization_organization_relations"; + } + throw new IllegalArgumentException("invalid relation type " + relType.toString()); + } + + + // Builder for Entities + protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) { + return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build(); + } + + // Builder for Rels + protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) { + return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build(); + } + + private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) { + if (info != null) { + return oaf.setDataInfo(ensureDataInfo(info)); + } else return oaf; + } + + protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) { + if (info.isInitialized()) return info; + return getDataInfo(false, null, "UNKNOWN", "0.9", false, false); + } + + protected static List getKeyValues(final ValueMap values, final String fieldName, final Type type) { + final ElementList collectedFroms = values.get(fieldName); + if (collectedFroms == null) { + throw new IllegalArgumentException("missing field " + fieldName); + } + return collectedFroms.stream() + .filter(e -> StringUtils.isNotBlank(e.getAttributeValue("id"))) + .filter(e -> StringUtils.isNotBlank(e.getAttributeValue("name"))) + .map(e -> getKV(oafSplitId(type.name(), e.getAttributeValue("id")), e.getAttributeValue("name"))) + .collect(Collectors.toList()); + } + + protected static KeyValue getKV(final String id, final String name) { + return KeyValue.newBuilder().setKey(id).setValue(name).build(); + } + + protected static OafRel.Builder getRel(final String sourceId, + final String targetId, + final RelType relType, + final SubRelType subRelType, + final String relClass, + final List collectedFrom, + final boolean isChild) { + final OafRel.Builder oafRel = OafRel.newBuilder().setSource(sourceId) + .setTarget(targetId) + .setRelType(relType) + .setSubRelType(subRelType) + .setRelClass(relClass) + .setChild(isChild); + + if (collectedFrom != null) { + oafRel.addAllCollectedfrom(collectedFrom); + } + return oafRel; + } + + protected static OafEntity.Builder getEntity(final Type type, + final String id, + final List collectedFrom, + final Collection originalIds, + final String dateOfCollection, + final String dateOfTransformation, + final List pids) { + final OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id); + if (collectedFrom != null) builder.addAllCollectedfrom(collectedFrom); + builder.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation); + builder.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection); + + if ((originalIds != null) && !originalIds.isEmpty()) { + builder.addAllOriginalId(originalIds.stream() + .filter(StringUtils::isNotBlank) + .collect(Collectors.toList())); + } + + if ((pids != null) && !pids.isEmpty()) { + builder.addAllPid( + pids.stream().filter(Objects::nonNull) + .collect(Collectors.toList())); + } + + return builder; + } + + public static DataInfo.Builder getDataInfo( + final NodeList about, + final String provenanceaction, + final String trust, + final boolean deletedbyinference, + final boolean inferred) { + return getDataInfo(false, about, provenanceaction, trust, deletedbyinference, inferred); + } + + public static DataInfo.Builder getDataInfo( + final boolean invisible, + final NodeList about, + final String provenanceaction, + final String trust, + final boolean deletedbyinference, + final boolean inferred) { + + final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder(); + dataInfoBuilder.setInvisible(invisible); + dataInfoBuilder.setInferred(inferred); + dataInfoBuilder.setDeletedbyinference(deletedbyinference); + dataInfoBuilder.setTrust(trust); + dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build()); + + // checking instanceof because when receiving an empty we don't want to parse it. + if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) { + + final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo"); + if (dataInfoElement != null) { + org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred"); + dataInfoBuilder.setInferred(Boolean.valueOf(getStringValue(elem, String.valueOf(inferred)))); + + elem = getDirectChild(dataInfoElement, "deletedbyinference"); + dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(getStringValue(elem, String.valueOf(deletedbyinference)))); + + elem = getDirectChild(dataInfoElement, "trust"); + dataInfoBuilder.setTrust(getStringValue(elem, trust)); + + elem = getDirectChild(dataInfoElement, "invisible"); + dataInfoBuilder.setInvisible(getBooleanValue(elem, invisible)); + + elem = getDirectChild(dataInfoElement, "inferenceprovenance"); + dataInfoBuilder.setInferenceprovenance(getStringValue(elem)); + + elem = getDirectChild(dataInfoElement, "provenanceaction"); + final Qualifier.Builder pBuilder = Qualifier.newBuilder(); + if (elem.hasAttributes()) { + final NamedNodeMap attributes = elem.getAttributes(); + pBuilder.setClassid(getAttributeValue(attributes, "classid")); + pBuilder.setClassname(getAttributeValue(attributes, "classname")); + pBuilder.setSchemeid(getAttributeValue(attributes, "schemeid")); + pBuilder.setSchemename(getAttributeValue(attributes, "schemename")); + } else { + pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build()); + } + dataInfoBuilder.setProvenanceaction(pBuilder); + } + } + + return dataInfoBuilder; + } + + protected static OAIProvenance getOAIProvenance(final NodeList about) { + + OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder(); + + if (((about != null) && (about.getLength() > 0))) { + + final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance"); + + if (provenance != null) { + final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription"); + oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder())); + } + } + + return oaiProv.build(); + } + + private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) { + od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered"))); + + org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL"); + od.setBaseURL(getStringValue(elem)); + + elem = getDirectChild(origDesc, "identifier"); + od.setIdentifier(getStringValue(elem)); + + elem = getDirectChild(origDesc, "datestamp"); + od.setDatestamp(getStringValue(elem)); + + elem = getDirectChild(origDesc, "metadataNamespace"); + od.setMetadataNamespace(getStringValue(elem)); + + elem = getDirectChild(origDesc, "originDescription"); + + if (elem != null) { + + od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder())); + } + + return od.build(); + } + + private static boolean getBooleanValue(final org.w3c.dom.Element elem, final boolean defaultValue) { + return (elem != null && elem.getTextContent() != null) ? Boolean.valueOf(elem.getTextContent()) : defaultValue; + } + + private static String getStringValue(final org.w3c.dom.Element elem, final String defaultValue) { + return (elem != null && elem.getTextContent() != null) ? elem.getTextContent() : defaultValue; + } + + private static String getStringValue(final org.w3c.dom.Element elem) { + return getStringValue(elem, ""); + } + + protected static String getAttributeValue(final NamedNodeMap attributes, final String name) { + final Node attr = attributes.getNamedItem(name); + if (attr == null) return ""; + final String value = attr.getNodeValue(); + return value != null ? value : ""; + } + + protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) { + for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) { + if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child; + } + return null; + } + + protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) { + return getQualifier(classname, classname, schemename, schemename); + } + + protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) { + return getQualifier(classname.toString(), classname.toString(), schemename, schemename); + } + + protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) { + return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename); + } + + protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List fields) { + if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null; + + if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) { + qualifier.setClassid(fields.get(0)); + qualifier.setClassname(getClassName(fields.get(0))); + } + return qualifier; + } + + protected static void addStructuredProps(final Builder builder, + final FieldDescriptor fd, + final ElementList values, + final String defaultClass, + final String defaultScheme) { + if (values != null) { + for (final Element s : values) { + final String classId = s.getAttributeValue("classid") != null ? s.getAttributeValue("classid") : defaultClass; + final String className = s.getAttributeValue("classname") != null ? s.getAttributeValue("classname") : defaultClass; + final String schemeId = s.getAttributeValue("schemeid") != null ? s.getAttributeValue("schemeid") : defaultScheme; + final String schemeName = s.getAttributeValue("schemename") != null ? s.getAttributeValue("schemename") : defaultScheme; + addField(builder, fd, getStructuredProperty(s.getText(), classId, className, schemeId, schemeName)); + } + } + } + + protected static void addJournal(final Metadata.Builder metadataProto, Element journalElement){ + final Journal.Builder journal = Journal.newBuilder(); + if (journalElement.getText() != null) { + journal.setName(journalElement.getText()); + } + + final Map attr = journalElement.getAttributes(); + if (attr != null) { + if (attr.get("issn") != null) { + journal.setIssnPrinted(attr.get("issn")); + } + if (attr.get("eissn") != null) { + journal.setIssnOnline(attr.get("eissn")); + } + if (attr.get("lissn") != null) { + journal.setIssnLinking(attr.get("lissn")); + } + + if (attr.get("ep") != null) { + journal.setEp(attr.get("ep")); + } + if (attr.get("iss") != null) { + journal.setIss(attr.get("iss")); + } + if (attr.get("sp") != null) { + journal.setSp(attr.get("sp")); + } + if (attr.get("vol") != null) { + journal.setVol(attr.get("vol")); + } + //TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28 + if (attr.get("ed") != null) { + journal.setEdition(attr.get("ed")); + } + //TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28 + if (attr.get("conferenceplace") != null) { + journal.setConferenceplace(attr.get("conferenceplace")); + } + //TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28 + if (attr.get("conferencedate") != null) { + journal.setConferencedate(attr.get("conferencedate")); + } + } + metadataProto.setJournal(journal.build()); + } + + + + private static final Set invalidPidTypes = + Sets.newHashSet("distributionlocation", "url", " ", "local accession id", "local", "local id", "a local accession number", "landingpage", "publisherid", "report number", "uri", "contract", "doc", + "issn", "issn (online)", "issn (print)", "eissn", "citation", "unknown", "other", "oai", "case number", "section", "series", "report", + "other numbers", "site id", "fulltext", "internal", "report numbers", "product number", "depositor id", "isbn13", "doe contract number", "revision", + "issue", "pages", "volume", "another identifier for this resource", "csvdownload", "hepdatarecord", "hepdatarecordalt", "rootdownload", "yamldownload", "yodadownload", + "md5", "firstid", "uuid", "poster number", "compactidentifiers", "sample_id", "source identifier", "lod-catalog", "internal id", "funder", "department", + "odin doi viewer", "odin matdb viewer", "bitstream", "dipartimento", "technical note (national research council of canada. division of building research) series", + "internal report (national research council canada. division of building research) series", "dk.dda.ddieditor.version", "extended kim id", "kim id", "ccin", + "dk.dda.study.annonymizeddata", "e-issn", "call number", "sequenza"); + protected static List parsePids(final NodeList nodelist) { + + final List pids = Lists.newArrayList(); + + for (int i = 0; i < nodelist.getLength(); i++) { + final Node node = nodelist.item(i); + Node pidType = null; + if (node.getNodeType() == Node.ELEMENT_NODE) { + if (node.getLocalName().equalsIgnoreCase("identifier")) { + pidType = node.getAttributes().getNamedItem("identifierType"); + } + //this is to handle dataset pids + if (node.getLocalName().equalsIgnoreCase("alternateIdentifier")) { + pidType = node.getAttributes().getNamedItem("alternateIdentifierType"); + } + + for (int j = 0; j < node.getChildNodes().getLength(); j++) { + final Node child = node.getChildNodes().item(j); + + if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()) { + + final String type = pidType.getNodeValue().toLowerCase(); + + if (invalidPidTypes.contains(type)) { + break; + } + + final String value = child.getTextContent(); + + pids.add(getStructuredProperty(value, type, getClassName(type), "dnet:pid_types", "dnet:pid_types")); + break; + } + } + } + } + + final Map pidMap = pids.stream() + .collect(Collectors.toMap( + p -> getStructuredPropertyKey(p), + Function.identity(), + (oldValue, newValue) -> newValue)); + + return Lists.newArrayList(pidMap.values()); + } + + private static String getStructuredPropertyKey(final StructuredProperty p) { + return StringUtils.lowerCase(p.getQualifier().getClassid()) + StringUtils.lowerCase(p.getValue()); + } + + @SuppressWarnings("unchecked") + protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) { + + if (value == null) return; + + if (value instanceof List>) { + for (final Object o : (List) value) { + addField(builder, descriptor, o); + } + } else { + Object fieldValue = value; + switch (descriptor.getType()) { + case BOOL: + fieldValue = Boolean.valueOf(value.toString()); + break; + case BYTES: + fieldValue = value.toString().getBytes(Charset.forName("UTF-8")); + break; + case DOUBLE: + fieldValue = Double.valueOf(value.toString()); + break; + case FLOAT: + fieldValue = Float.valueOf(value.toString()); + break; + case INT32: + case INT64: + case SINT32: + case SINT64: + fieldValue = Integer.valueOf(value.toString()); + break; + case MESSAGE: + final Builder q = builder.newBuilderForField(descriptor); + + if (value instanceof Builder) { + value = ((Builder) value).build(); + final byte[] b = ((Message) value).toByteArray(); + try { + q.mergeFrom(b); + } catch (final InvalidProtocolBufferException e) { + throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName()); + } + } else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof Qualifier) { + q.mergeFrom((Qualifier) value); + } else { + + List split = Lists.newArrayList(Splitter + .on("@@@").trimResults().split(value.toString())); + if (split.size() == 4) { + parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@"); + } else { + final String classid = split.get(0); + final String schemeid = split.get(1); + final Qualifier qualifier = Qualifier.newBuilder() + .setClassid(classid) + .setClassname(getClassName(classid)) + .setSchemeid(schemeid) + .setSchemename(schemeid).build(); + q.mergeFrom(qualifier); + } + } + } else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof StructuredProperty) { + q.mergeFrom((StructuredProperty) value); + } else { + parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###"); + } + } else if(Journal.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + final Journal.Builder journal = (Journal.Builder) q; + List ssns = Splitter.on("@@@").splitToList(value.toString()); + //in order: issn, eissn, lissn + journal.setIssnPrinted(ssns.get(0)).setIssnOnline(ssns.get(1)).setIssnLinking(ssns.get(2)); + } else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof KeyValue) { + q.mergeFrom((KeyValue) value); + } else { + parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&"); + } + } else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof StringField) { + q.mergeFrom((StringField) value); + } else { + q.setField(StringField.getDescriptor().findFieldByName("value"), value); + } + } else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof BoolField) { + q.mergeFrom((BoolField) value); + } else if (value instanceof String) { + q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value)); + } else { + q.setField(BoolField.getDescriptor().findFieldByName("value"), value); + } + } else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof IntField) { + q.mergeFrom((IntField) value); + } else if (value instanceof String) { + q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value)); + } else { + q.setField(IntField.getDescriptor().findFieldByName("value"), value); + } + } + + fieldValue = q.buildPartial(); + break; + default: + break; + } + + doAddField(builder, descriptor, fieldValue); + } + + } + + protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) { + if (value != null) { + if (fd.isRepeated()) { + builder.addRepeatedField(fd, value); + } else if (fd.isOptional() || fd.isRequired()) { + builder.setField(fd, value); + } + } + } + + protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) { + final IterablePair iterablePair = + new IterablePair(descriptor.getFields(), Lists.newArrayList(Splitter + .on(split).trimResults().split(value))); + + for (final Pair p : iterablePair) { + addField(builder, p.getKey(), p.getValue()); + } + } + + protected static String base64(final byte[] data) { + return new String(Base64.encodeBase64(data)); + } + + public static String replace(final String s, final String regex, final String replacement) { + return s.replaceAll(regex, replacement); + } + + public static String trim(final String s) { + return s.trim(); + } + + protected static String removePrefix(final Type type, final String s) { + return removePrefix(type.toString(), s); + } + + private static String removePrefix(final String prefix, final String s) { + return StringUtils.removeStart("" + s, prefix + "|"); + } + + protected static Qualifier.Builder getDefaultQualifier(final String scheme) { + final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme); + return qualifier; + } + + protected static StructuredProperty getStructuredProperty(final String value, + final String classid, + final String classname, + final String schemeid, + final String schemename) { + if ((value == null) || value.isEmpty()) return null; + return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build(); + } + + protected static StringField.Builder sf(final String s) { + return StringField.newBuilder().setValue(s); + } + + public static String generateNsPrefix(final String prefix, final String externalId) { + return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN); + } + + public static String md5(final String s) { + try { + final MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(s.getBytes("UTF-8")); + return new String(Hex.encodeHex(md.digest())); + } catch (final Exception e) { + System.err.println("Error creating id"); + return null; + } + } + + public static String oafId(final String entityType, final String prefix, final String id) { + if (id.isEmpty() || prefix.isEmpty()) return ""; + return oafSimpleId(entityType, prefix + "::" + md5(id)); + } + + public static String oafSimpleId(final String entityType, final String id) { + return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", ""); + } + + public static String oafSplitId(final String entityType, final String fullId) { + return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::")); + } + + /** + * Gets the classname of the given class code + * + * @param code class code. + * @return the class name, if the code is a key of the map. The code itself otherwise. + */ + public static String getClassName(final String code) { + final String classname = code2name.get(code); + if (StringUtils.isBlank(classname)) return code; + return classname; + } + + /** + * Utility method, allows to perform param based map lookups in xsl + * + * @param map + * @param key + * @return value associated to the key. + */ + public static Object lookupValue(final Map map, final String key) { + return map.get(key); + } + + /** + * Utility method, allows to perform param based map lookups in xsl + * + * @param map + * @param key + * @return value associated to the key. + */ + public static int mustMerge(final Map map, final String key) { + final Object val = lookupValue(map, key); + return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0; + } + + public static String[] split(String name, String token){ + return name.split(token); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/CommonDNetXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/CommonDNetXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/CommonDNetXsltFunctions.java (revision 58513) @@ -0,0 +1,101 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import com.google.protobuf.Message.Builder; +import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.TypeProtos.Type; +import org.w3c.dom.NodeList; + +/** + * Created by claudio on 01/12/15. + */ +public class CommonDNetXsltFunctions extends AbstractDNetXsltFunctions { + + private static final int MAX_COAUTHORS = 50; + + public static String rel( + final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final String provenanceAction, + final String trust) { + return rel(source, target, relType, subRelType, relClass, null, provenanceAction, trust, null, null); + } + + public static String rel( + final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final NodeList metadata, + final String provenanceAction, + final String trust) { + return rel(source, target, relType, subRelType, relClass, metadata, provenanceAction, trust, null, null); + } + + public static String rel( + final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final NodeList metadata, + final String provenanceAction, + final String trust, + final NodeList about) { + return rel(source, target, relType, subRelType, relClass, metadata, provenanceAction, trust, about, null); + } + + public static String rel( + final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final NodeList metadata, + final String provenanceAction, + final String trust, + final NodeList about, + final Map params) { + + ValueMap values = null; + List collectedFrom = Lists.newArrayList(); + try { + final String eSource = OafRowKeyDecoder.decode(source).getKey(); + final String eTarget = OafRowKeyDecoder.decode(target).getKey(); + + final RelType rType = RelType.valueOf(relType); + final SubRelType srType = SubRelType.valueOf(subRelType); + + final RelMetadata.Builder metadataBuilder = RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, getVocabularyName(rType))); + + final Builder subRel = getSubRelBuilder(metadataBuilder, srType, params); + + if(metadata != null) { + values = ValueMap.parseNodeList(metadata); + collectedFrom = getKeyValues(values, "collectedFrom", Type.datasource); + } + + final OafRel.Builder rel = getRelBuilder(rType, srType, getRel(eSource, eTarget, rType, srType, relClass, collectedFrom, false), subRel); + + final Oaf oaf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false)); + return base64(oaf.toByteArray()); + } catch (Exception e) { + e.printStackTrace(System.err); + throw new RuntimeException(e); + } + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/DatePicker.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/DatePicker.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/DatePicker.java (revision 58513) @@ -0,0 +1,112 @@ +package eu.dnetlib.data.transform; + +import eu.dnetlib.data.proto.FieldTypeProtos; +import org.apache.commons.lang.StringUtils; + +import java.time.Year; +import java.util.*; +import java.util.stream.Collectors; + +import static java.util.Collections.reverseOrder; +import static java.util.Map.Entry.comparingByValue; +import static java.util.stream.Collectors.toMap; +import static org.apache.commons.lang.StringUtils.endsWith; +import static org.apache.commons.lang.StringUtils.substringBefore; + +public class DatePicker { + + private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}"; + private static final String DATE_DEFAULT_SUFFIX = "01-01"; + private static final int YEAR_LB = 1300; + private static final int YEAR_UB = Year.now().getValue() + 5; + + public static FieldTypeProtos.StringField pick(final Collection dateofacceptance) { + + final Map frequencies = dateofacceptance + .parallelStream() + .filter(StringUtils::isNotBlank) + .collect( + Collectors.toConcurrentMap( + w -> w, w -> 1, Integer::sum)); + + if (frequencies.isEmpty()) { + return FieldTypeProtos.StringField.newBuilder().setValue("").build(); + } + + final FieldTypeProtos.StringField.Builder date = FieldTypeProtos.StringField.newBuilder().setValue(frequencies.keySet().iterator().next()); + + // let's sort this map by values first, filtering out invalid dates + final Map sorted = frequencies + .entrySet() + .stream() + .filter(d -> StringUtils.isNotBlank(d.getKey())) + .filter(d -> d.getKey().matches(DATE_PATTERN)) + .filter(d -> inRange(d.getKey())) + .sorted(reverseOrder(comparingByValue())) + .collect( + toMap( + Map.Entry::getKey, + Map.Entry::getValue, (e1, e2) -> e2, + LinkedHashMap::new)); + + // shortcut + if (sorted.size() == 0) { + return date.build(); + } + + // voting method (1/3 + 1) wins + if (sorted.size() >= 3) { + final int acceptThreshold = (sorted.size() / 3) + 1; + final List accepted = sorted.entrySet().stream() + .filter(e -> e.getValue() >= acceptThreshold) + .map(e -> e.getKey()) + .collect(Collectors.toList()); + + // cannot find strong majority + if (accepted.isEmpty()) { + final int max = sorted.values().iterator().next(); + Optional first = sorted.entrySet().stream() + .filter(e -> e.getValue() == max && !endsWith(e.getKey(), DATE_DEFAULT_SUFFIX)) + .map(Map.Entry::getKey) + .findFirst(); + if (first.isPresent()) { + return date.setValue(first.get()).build(); + } + + return date.setValue(sorted.keySet().iterator().next()).build(); + } + + if (accepted.size() == 1) { + return date.setValue(accepted.get(0)).build(); + } else { + final Optional first = accepted.stream() + .filter(d -> !endsWith(d, DATE_DEFAULT_SUFFIX)) + .findFirst(); + if (first.isPresent()) { + return date.setValue(first.get()).build(); + } + + return date.build(); + } + + //1st non YYYY-01-01 is returned + } else { + if (sorted.size() == 2) { + for (Map.Entry e : sorted.entrySet()) { + if (!endsWith(e.getKey(), DATE_DEFAULT_SUFFIX)) { + return date.setValue(e.getKey()).build(); + } + } + } + + // none of the dates seems good enough, return the 1st one + return date.setValue(sorted.keySet().iterator().next()).build(); + } + } + + private static boolean inRange(final String date) { + final int year = Integer.parseInt(substringBefore(date, "-")); + return year >= YEAR_LB && year <= YEAR_UB; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafRelDecoder.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafRelDecoder.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafRelDecoder.java (revision 58513) @@ -0,0 +1,155 @@ +package eu.dnetlib.data.mapreduce.util; + +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.GeneratedMessage; +import com.google.protobuf.Message.Builder; +import com.google.protobuf.MessageOrBuilder; +import com.google.protobuf.ProtocolMessageEnum; + +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.TypeProtos.Type; + +public class OafRelDecoder { + + private static final String SEPARATOR = "_"; + + private final OafRel oafRel; + + public static OafRelDecoder decode(final OafRel oafRel) { + return new OafRelDecoder(oafRel); + } + + private OafRelDecoder(final OafRel oafRel) { + this.oafRel = oafRel; + } + + public RelType getRelType() { + return oafRel.getRelType(); + } + + public String relTypeName() { + return getRelType().toString(); + } + + public SubRelType getSubRelType() { + return oafRel.getSubRelType(); + } + + public String relSubTypeName() { + return getSubRelType().toString(); + } + + public String getCF() { + return OafRelDecoder.getCF(getRelType(), getSubRelType()); + } + + public String getCFQ() { + return OafRelDecoder.getCFQ(getRelType(), getSubRelType(), getRelClass()); + } + + public static String getCFQ(final RelType relType, final SubRelType subRelType, final ProtocolMessageEnum relClass) { + return OafRelDecoder.getCFQ(relType, subRelType, relClass.getValueDescriptor().getName()); + } + + public static String getCFQ(final RelType relType, final SubRelType subRelType, final String relClass) { + return OafRelDecoder.getCF(relType, subRelType) + SEPARATOR + relClass; + } + + public static String getCF(final RelType relType, final SubRelType subRelType) { + return relType + SEPARATOR + subRelType; + } + + public String getRelClass() { + return oafRel.getRelClass(); + } + + public RelDescriptor getRelDescriptor() { + return new RelDescriptor(getCFQ()); + } + + public GeneratedMessage getRel() { + + FieldDescriptor fd = oafRel.getDescriptorForType().findFieldByName(relTypeName()); + return (GeneratedMessage) oafRel.getField(fd); + } + + public GeneratedMessage getSubRel() { + GeneratedMessage rel = getRel(); + FieldDescriptor fd = rel.getDescriptorForType().findFieldByName(relSubTypeName()); + return (GeneratedMessage) rel.getField(fd); + } + + public RelMetadata getRelMetadata() { + GeneratedMessage rel = getSubRel(); + FieldDescriptor fd = rel.getDescriptorForType().findFieldByName("relMetadata"); + return fd != null ? (RelMetadata) rel.getField(fd) : null; + } + + public OafRel.Builder setClassId(final String classid) { + RelMetadata.Builder relMetadataBuilder = RelMetadata.newBuilder(getRelMetadata()); + relMetadataBuilder.getSemanticsBuilder().setClassid(classid).setClassname(classid); + + OafRel.Builder builder = OafRel.newBuilder(oafRel); + + FieldDescriptor fdRel = fd(oafRel, relTypeName()); + Builder relBuilder = builder.newBuilderForField(fdRel); + + FieldDescriptor fdSubRel = fd(relBuilder, relSubTypeName()); + Builder subRelBuilder = relBuilder.newBuilderForField(fdSubRel).mergeFrom(getSubRel()); + + subRelBuilder.setField(fd(getSubRel(), "relMetadata"), relMetadataBuilder.build()); + + relBuilder.setField(fdSubRel, subRelBuilder.build()); + builder.setField(fdRel, relBuilder.build()); + + return builder.setRelClass(classid); + } + + public Type getTargetType(final Type sourceType) { + switch (getRelType()) { + case datasourceOrganization: + return sourceType.equals(Type.datasource) ? Type.organization : Type.datasource; + case organizationOrganization: + return Type.organization; + case projectOrganization: + return sourceType.equals(Type.project) ? Type.organization : Type.project; + case resultOrganization: + return sourceType.equals(Type.result) ? Type.organization : Type.result; + case resultProject: + return sourceType.equals(Type.result) ? Type.project : Type.result; + case resultResult: + return Type.result; + default: + throw new IllegalArgumentException("Unknown relationship type: " + relTypeName()); + } + } + + protected FieldDescriptor fd(final MessageOrBuilder mb, final int fieldNumber) { + return mb.getDescriptorForType().findFieldByNumber(fieldNumber); + } + + protected FieldDescriptor fd(final MessageOrBuilder mb, final String fieldName) { + return mb.getDescriptorForType().findFieldByName(fieldName); + } + + public String getCachedTargedId() { + + if (!oafRel.hasCachedTarget()) return null; + + final OafEntity entity = oafRel.getCachedTarget(); + return OafEntityDecoder.decode(entity).getId(); + } + + public String getRelSourceId() { + return oafRel.getSource(); + } + + public String getRelTargetId() { + return oafRel.getTarget(); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java (revision 58513) @@ -0,0 +1,249 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.Message.Builder; +import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; +import eu.dnetlib.data.proto.DatasourceProtos.Datasource; +import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.OrganizationProtos.Organization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; +import eu.dnetlib.data.proto.ProjectProtos.Project; +import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization; +import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.Instance; +import eu.dnetlib.data.proto.TypeProtos.Type; +import org.apache.commons.lang3.StringUtils; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions { + + public static String oafEntity(final String type, + final String id, + final String collectedFromId, + final String collectedFromName, + final NodeList identities, + final String dateOfCollection, + final String dateOfTransformation, + final NodeList nodeList) { + + final String entityId = OafRowKeyDecoder.decode(id).getKey(); + List ids = Lists.newArrayList(); + for(int i = 0; i < identities.getLength(); i++){ + Node n = identities.item(i); + String s = n.getTextContent(); + ids.add(s); + } + switch (Type.valueOf(type)) { + case datasource: + return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, + dateOfTransformation, Datasource.newBuilder()); + case organization: + return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, + dateOfTransformation, Organization.newBuilder()); + case project: + return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, + dateOfTransformation, Project.newBuilder()); + case result: + return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, + dateOfTransformation ,Result.newBuilder()); + default: + throw new IllegalArgumentException("Invalid entity type: " + type); + } + } + + public static String oafRel(final String relationType, + final String source, + final String target, + final NodeList nodeList, + final String relClass, + final String relScheme) { + return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null); + } + + public static String oafRel(final String relationType, + final String source, + final String target, + final NodeList nodeList, + final String relClass, + final String relScheme, + final String collectedFromId, + final String collectedFromName) { + + final String eSource = OafRowKeyDecoder.decode(source).getKey(); + final String eTarget = OafRowKeyDecoder.decode(target).getKey(); + final RelType relType = RelType.valueOf(relationType); + + switch (relType) { + case datasourceOrganization: + Provision.Builder provision = Provision.newBuilder().setRelMetadata( + RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme))); + DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision); + + return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision); + case projectOrganization: + Participation.Builder participant = Participation.newBuilder().setRelMetadata( + RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme))); + ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant); + + return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant); + case resultProject: + Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata( + RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme))); + ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome); + + return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome); + case resultOrganization: + Affiliation.Builder affiliation = Affiliation.newBuilder().setRelMetadata( + RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Affiliation.RelName.valueOf(relClass).toString(), relScheme))); + ResultOrganization.Builder resultOrganization = ResultOrganization.newBuilder().setAffiliation(affiliation); + + return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.affiliation, relClass, collectedFromId, collectedFromName, false, resultOrganization, affiliation); + default: + throw new IllegalArgumentException("unhandled relType: " + relationType); + } + } + + // //////////////////////////////////////////////////////// + + protected static String serializeOafEntity(final NodeList nodelist, + final Type type, + final String id, + final KeyValue collectedFrom, + final List identities, + final String dateOfCollection, + final String dateOfTransformation, + final Builder entity) { + try { + final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata"); + + final OafEntity.Builder parent = getEntity(type, id, Lists.newArrayList(collectedFrom), identities, dateOfCollection, dateOfTransformation, null); + final Builder metadata = entity.newBuilderForField(md); + final DataInfo.Builder dataInfo = DataInfo.newBuilder(); + + if (type.equals(Type.result)) { + final Instance.Builder instance = Instance.newBuilder(); + parseNodelist(nodelist, instance); + FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName()); + if (instanceDescriptor != null) { + entity.setField(instanceDescriptor, instance); + } + } + parseNodelist(nodelist, parent, entity, metadata, dataInfo); + + final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString()); + + final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo); + + return base64(build.toByteArray()); + } catch (Exception e) { + e.printStackTrace(System.err); + throw new RuntimeException(e); + } + } + + protected static String serializeOafRel(final NodeList nodeList, + final String sourceId, + final String targetId, + final RelType relType, + final SubRelType subRelType, + final String relClass, + final String collectedFromId, + final String collectedFromName, + final boolean isChild, + final Builder rel, + final Builder subRel) { + try { + + final DataInfo.Builder dataInfo = DataInfo.newBuilder(); + + parseNodelist(nodeList, rel, subRel, dataInfo); + + + OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, getCollectedFrom(collectedFromId, collectedFromName), isChild); + + FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString()); + rel.setField(subRelDescriptor, subRel.build()); + + FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString()); + builder.setField(relDescriptor, rel.build()); + + Oaf build = getOaf(builder, dataInfo); + return base64(build.toByteArray()); + } catch (Exception e) { + e.printStackTrace(System.err); + throw new RuntimeException(e); + } + } + + private static List getCollectedFrom(final String id, final String name) { + if (StringUtils.isBlank(id)) { + return Lists.newArrayList(); + } + return Lists.newArrayList(getKV(id, name)); + } + + private static void parseNodelist(final NodeList nodeList, final Builder... builders) { + + for (int i = 0; i < nodeList.getLength(); i++) { + + final Node fieldNode = nodeList.item(i); + final Node attr = fieldNode.getAttributes().getNamedItem("name"); + + final String fieldName = attr.getNodeValue(); + final NodeList children = fieldNode.getChildNodes(); + + for (int j = 0; j < children.getLength(); j++) { + + final Node child = children.item(j); + final String childName = child.getLocalName(); + if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) { + for (Builder builder : builders) { + FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName); + if (desc != null) { + String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child); + if (!StringUtils.isBlank(text)) { + addField(builder, desc, text); + } + } + } + } + } + } + } + + private static String getText(final Node node) { + StringBuffer result = new StringBuffer(); + if (!node.hasChildNodes()) { return ""; } + + NodeList list = node.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node subnode = list.item(i); + if (subnode.getNodeType() == Node.TEXT_NODE) { + result.append(subnode.getNodeValue()); + } else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) { + result.append(subnode.getNodeValue()); + } else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) { + result.append(getText(subnode)); + } + } + return result.toString().trim(); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ValueMap.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ValueMap.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ValueMap.java (revision 58513) @@ -0,0 +1,73 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.HashMap; +import java.util.Map; + +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import com.google.common.collect.Maps; + +@SuppressWarnings("serial") +public class ValueMap extends HashMap { + + public static String IDX_ATTRIBUTE = "idx"; + + public static ValueMap parseNodeList(final NodeList nodeList) { + final ValueMap values = new ValueMap(); + + for (int i = 0; i < nodeList.getLength(); i++) { + getNodeValue(nodeList.item(i), values); + } + return values; + } + + protected static void getNodeValue(final Node node, final ValueMap values) { + + final String nodeName = node.getLocalName().toLowerCase(); + + final Node nodeText = node.getFirstChild(); + final Element element = nodeText != null ? new Element(nodeText.getNodeValue()) : new Element(); + final Map attrs = Maps.newHashMap(); + + final NamedNodeMap attributeList = node.getAttributes(); + for (int j = 0; j < attributeList.getLength(); j++) { + Node attr = attributeList.item(j); + if ((attr.getNodeValue() != null) && !attr.getNodeValue().isEmpty()) { + attrs.put(attr.getLocalName(), attr.getNodeValue()); + if (values.containsKey(nodeName)) { + attrs.put(IDX_ATTRIBUTE, String.valueOf(values.get(nodeName).size() + 1)); + } else { + attrs.put(IDX_ATTRIBUTE, "1"); + } + } + } + element.setAttributes(attrs); + + if (!element.isEmpty()) { + if (!values.containsKey(nodeName)) { + values.put(nodeName, new ElementList()); + } + + values.get(nodeName).add(element); + } + } + + @Override + public ElementList get(final Object key) { + ElementList e = super.get(key); + return e != null ? e : new ElementList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (String k : this.keySet()) { + sb.append(k).append("=").append(this.get(k)).append("\n"); + } + sb.append("}"); + return sb.toString(); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java (revision 58513) @@ -0,0 +1,445 @@ +package eu.dnetlib.data.mapreduce.util; + +import com.google.protobuf.GeneratedMessage; +import com.google.protobuf.InvalidProtocolBufferException; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; +import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; +import eu.dnetlib.data.proto.DatasourceProtos.Datasource; +import eu.dnetlib.data.proto.DedupProtos.Dedup; +import eu.dnetlib.data.proto.FieldTypeProtos.*; +import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; +import eu.dnetlib.data.proto.OrganizationProtos.Organization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; +import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; +import eu.dnetlib.data.proto.ProjectProtos.Project; +import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; +import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.Context; +import eu.dnetlib.data.proto.ResultProtos.Result.Instance; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; +import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity; +import eu.dnetlib.data.proto.TypeProtos.Type; + +public class OafTest { + + public static final String CITATION_JSON = + "\n \n [10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).\n \n \n [11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).\n \n \n [12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).\n \n \n [13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).\n \n \n [14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).\n \n \n [15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).\n \n \n [16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).\n \n \n [17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).\n \n \n [18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).\n \n \n [19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.\n \n \n [1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).\n \n \n [20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).\n \n \n [21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).\n \n \n [22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).\n \n \n [23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).\n \n \n [24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).\n \n \n [25] U. Buchenau, Z. Phys. B 58, 181 (1985).\n \n \n [26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).\n \n \n [27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).\n \n \n [28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).\n \n \n [29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).\n \n \n [2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).\n \n \n [30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).\n \n \n [31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).\n \n \n [32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).\n \n \n [33] F. Sette et al., Science 280, 1550 (1998).\n \n \n [34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).\n \n \n [35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).\n \n \n [36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).\n \n \n [37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).\n \n \n [38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .\n \n \n [39] A. Matic et al., Europhys. Lett. 54, 77 (2001).\n \n \n [3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.\n \n \n [40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.\n \n \n [41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).\n \n \n [42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).\n \n \n [43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).\n \n \n [44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.\n \n \n [45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).\n \n \n [46] M. Arai et al., Physica B 263-264, 268 (1999).\n \n \n [4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).\n \n \n [5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).\n \n \n [6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).\n \n \n [7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).\n \n \n [8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.\n \n \n [9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).\n \n"; + + public static final String STATISTICS_JSON = + "[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]"; + + public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) { + return getStructuredproperty(value, classname, schemename, null); + } + + public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) { + final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename)); + if (dataInfo != null) { + sp.setDataInfo(dataInfo); + } + return sp; + } + + public static Qualifier.Builder getQualifier(final String classname, final String schemename) { + return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); + } + + public static KeyValue getKV(final String id, final String name) { + return KeyValue.newBuilder().setKey(id).setValue(name).build(); + } + + public static OafEntity getDatasource(final String datasourceId) { + return OafEntity + .newBuilder() + .setType(Type.datasource) + .setId(datasourceId) + .setDatasource( + Datasource.newBuilder().setMetadata( + Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname")) + .setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage")) + .setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems")) + .setOdnumberofitemsdate(sf("numberofitems date")) + // .addOdsubjects("subjects") + .setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes")) + .setDatasourcetype(getQualifier("type class", "type scheme")))).build(); + } + + public static OafEntity getResult(final String id) { + return getResultBuilder(id).build(); + } + + public static OafEntity.Builder getResultBuilder(final String id) { + return OafEntity + .newBuilder() + .setType(Type.result) + .setId(id) + .setResult( + Result.newBuilder() + .setMetadata( + Result.Metadata + .newBuilder() + .addTitle( + getStructuredproperty( + "Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population", + "main title", "dnet:result_titles", getDataInfo())) + .addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo())) + .addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects")) + .setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB")) + .addContext(Context.newBuilder().setId("egi::virtual::970")) + .addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied")) + .addContext(Context.newBuilder().setId("egi::classification::natsc::math")) + .addContext(Context.newBuilder().setId("egi::classification::natsc")) + .addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi")) + .addDescription(sf("Responsible for making and maintaining the extracellular matrix ...")) + .addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD")) + .setResulttype(getQualifier("publication", "dnet:result_types")) + .setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed")) + .addInstance(getInstance("10|od__10", "arxiv"))) + .addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive")) + .addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids")) + .setDateofcollection(""); + } + + public static DataInfo getDataInfo() { + return getDataInfo("0.4"); + } + + public static DataInfo getDataInfo(final String trust) { + return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy")) + .build(); + } + + public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) { + return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setAccessright(getQualifier("OpenAccess", "dnet:access_modes")) + .setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url"); + + } + + public static OafRel getDedupRel(final String source, final String target, final RelType relType, final String relClass) { + return OafRel.newBuilder().setSource(source).setTarget(target).setRelType(relType).setSubRelType(SubRelType.dedup).setRelClass(relClass) + .setChild(false).setCachedTarget(getResult(target)) + .setResultResult(ResultResult.newBuilder().setDedup(Dedup.newBuilder().setRelMetadata(RelMetadata.getDefaultInstance()))) + .build(); + } + + public static OafRel getProjectOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { + final OafRel.Builder oafRel = OafRel + .newBuilder() + .setSource(source) + .setTarget(target) + .setRelType(RelType.projectOrganization) + .setSubRelType(SubRelType.participation) + .setRelClass(relClass) + .setChild(false) + .setProjectOrganization( + ProjectOrganization.newBuilder().setParticipation( + Participation.newBuilder().setParticipantnumber("" + 1) + .setRelMetadata(relMetadata(relClass, "dnet:project_organization_relations")))); + switch (Participation.RelName.valueOf(relClass)) { + case hasParticipant: + oafRel.setCachedTarget(getProjectFP7(target, "SP3")); + break; + case isParticipant: + oafRel.setCachedTarget(getOrganization(target)); + break; + default: + break; + } + return oafRel.build(); + } + + public static GeneratedMessage getOrganizationOrganization(final String source, final String target, final String relClass) { + final OafRel.Builder oafRel = OafRel + .newBuilder() + .setSource(source) + .setTarget(target) + .setRelType(RelType.organizationOrganization) + .setSubRelType(SubRelType.dedup) + .setRelClass(relClass) + .setChild(true) + .setOrganizationOrganization( + OrganizationOrganization.newBuilder().setDedup( + Dedup.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:organization_organization_relations")))); + + switch (Dedup.RelName.valueOf(relClass)) { + case isMergedIn: + oafRel.setCachedTarget(getOrganization(source)); + break; + case merges: + oafRel.setCachedTarget(getOrganization(target)); + break; + default: + break; + } + return oafRel.build(); + } + + public static OafRel getDatasourceOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { + final OafRel.Builder oafRel = OafRel + .newBuilder() + .setSource(source) + .setTarget(target) + .setRelType(RelType.datasourceOrganization) + .setSubRelType(SubRelType.provision) + .setRelClass(relClass) + .setChild(false) + .setDatasourceOrganization( + DatasourceOrganization.newBuilder().setProvision( + Provision.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:datasource_organization_relations")))); + switch (Provision.RelName.valueOf(relClass)) { + case isProvidedBy: + oafRel.setCachedTarget(getOrganization(target)); + break; + case provides: + oafRel.setCachedTarget(getDatasource(target)); + break; + default: + break; + } + return oafRel.build(); + } + + public static OafRel getSimilarityRel(final String sourceId, final String targetId, final OafEntity result, final String relClass) { + return OafRel + .newBuilder() + .setSource(sourceId) + .setTarget(targetId) + .setRelType(RelType.resultResult) + .setSubRelType(SubRelType.similarity) + .setRelClass(relClass) + .setChild(false) + .setCachedTarget(result) + .setResultResult( + ResultResult.newBuilder().setSimilarity( + Similarity.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:resultResult_relations")).setSimilarity(.4f) + .setType(Similarity.Type.STANDARD))).build(); + } + + public static RelMetadata.Builder relMetadata(final String classname, final String schemename) { + return RelMetadata.newBuilder().setSemantics(getQualifier(classname, schemename)); + } + + public static OafEntity getOrganization(final String orgId) { + return OafEntity + .newBuilder() + .setType(Type.organization) + .setId(orgId) + .addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) + .setOrganization( + Organization.newBuilder().setMetadata( + Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE")) + .setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org")) + .setCountry(getQualifier("ML", "dnet:countries")))).build(); + } + + public static OafRel getResultProject(final String from, final String to, final OafEntity project, final String relClass) + throws InvalidProtocolBufferException { + return OafRel + .newBuilder() + .setSource(from) + .setTarget(to) + .setRelType(RelType.resultProject) + .setSubRelType(SubRelType.outcome) + .setRelClass(relClass) + .setChild(false) + .setResultProject( + ResultProject.newBuilder().setOutcome(Outcome.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:result_project_relations")))) + .setCachedTarget(project).build(); + } + + public static OafEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException { + return OafEntity + .newBuilder() + .setType(Type.project) + .setId(projectId) + .addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) + .setProject( + Project.newBuilder() + .setMetadata( + Project.Metadata + .newBuilder() + .setAcronym(sf("5CYRQOL")) + .setTitle(sf("Cypriot Researchers Contribute to our Quality of Life")) + .setStartdate(sf("2007-05-01")) + .setEnddate(sf("2007-10-31")) + .setEcsc39(sf("false")) + .setContracttype(getQualifier("CSA", "ec:FP7contractTypes")) + .addFundingtree( + sf("ec__________::ECECEuropean Commissionec__________::EC::FP7::" + + fundingProgram + + "::PEOPLEMarie-Curie ActionsPEOPLEec:programec__________::EC::FP7::" + + fundingProgram + + "" + + fundingProgram + + "-People" + + fundingProgram + + "ec:specificprogramec__________::EC::FP7SEVENTH FRAMEWORK PROGRAMMEFP7ec:frameworkprogram")))) + .build(); + } + + public static OafEntity getProjectWT() throws InvalidProtocolBufferException { + return OafEntity + .newBuilder() + .setType(Type.project) + .setId("project|wt::087536") + .addCollectedfrom(getKV("wellcomeTrust", "wellcome trust")) + .setProject( + Project.newBuilder() + .setMetadata( + Project.Metadata + .newBuilder() + .setAcronym(sf("UNKNOWN")) + .setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP).")) + .setStartdate(sf("2007-05-01")) + .setEnddate(sf("2007-10-31")) + .setEcsc39(sf("false")) + .setContracttype(getQualifier("UNKNOWN", "wt:contractTypes")) + .addFundingtree( + sf("wt__________::WTWTWellcome Trustwt__________::WT::UNKNOWNUNKNOWNUNKNOWNwt:fundingStream")) + .addFundingtree( + sf("wt__________::WTWTWellcome Trustwt__________::WT::Technology TransferTechnology TransferTechnology Transferwt:fundingStream")))) + .build(); + } + + public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) { + final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value); + return e.build(); + } + + // public static DocumentClasses documentClasses() { + // DocumentClasses.Builder builder = DocumentClasses.newBuilder(); + // for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASSES) + 1; i++) { + // builder.addArXivClasses(getDocumentClass()).addDdcClasses(getDocumentClass()).addWosClasses(getDocumentClass()) + // .addMeshEuroPMCClasses(getDocumentClass()); + // } + // return builder.build(); + // } + // + // private static DocumentClass getDocumentClass() { + // DocumentClass.Builder builder = DocumentClass.newBuilder(); + // for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASS_LABELS) + 1; i++) { + // builder.addClassLabels("test_class_" + i); + // } + // return builder.setConfidenceLevel(0.5F).build(); + // } + // + // public static DocumentStatistics documentStatistics() { + // return + // DocumentStatistics.newBuilder().setCitationsFromAllPapers(basicCitationStatistics()).setCitationsFromPublishedPapers(basicCitationStatistics()) + // .build(); + // } + // + // private static BasicCitationStatistics basicCitationStatistics() { + // BasicCitationStatistics.Builder builder = BasicCitationStatistics.newBuilder(); + // for (int i = 0; i < N_CITATION_STATS; i++) { + // builder.addNumberOfCitationsPerYear(statisticsKeyValue()); + // builder.setNumberOfCitations(RandomUtils.nextInt(5) + 1); + // } + // return builder.build(); + // } + // + // private static StatisticsKeyValue statisticsKeyValue() { + // return StatisticsKeyValue.newBuilder().setKey((RandomUtils.nextInt(30) + 1980) + "").setValue(RandomUtils.nextInt(5) + 1).build(); + // } + // + // public static AuthorStatistics authorStatistics() { + // AuthorStatistics.Builder builder = AuthorStatistics.newBuilder(); + // builder.setCore(commonCoreStatistics()); + // for (int i = 0; i < N_COAUTHORS; i++) { + // builder.addCoAuthors(coAuthor()); + // } + // return builder.build(); + // } + // + // private static CoAuthor coAuthor() { + // CoAuthor.Builder builder = CoAuthor.newBuilder(); + // builder.setId("30|od______2345::" + Hashing.md5(RandomStringUtils.random(10))); + // builder.setCoauthoredPapersCount(RandomUtils.nextInt(5) + 1); + // return builder.build(); + // } + // + // public static CommonCoreStatistics commonCoreStatistics() { + // CommonCoreStatistics.Builder builder = CommonCoreStatistics.newBuilder(); + // + // builder.setAllPapers(coreStatistics()); + // builder.setPublishedPapers(coreStatistics()); + // + // return builder.build(); + // } + // + // private static CoreStatistics coreStatistics() { + // CoreStatistics.Builder builder = CoreStatistics.newBuilder(); + // + // builder.setNumberOfPapers(RandomUtils.nextInt(10)); + // builder.setCitationsFromAllPapers(extendedStatistics()); + // builder.setCitationsFromPublishedPapers(extendedStatistics()); + // + // return builder.build(); + // } + // + // private static ExtendedStatistics extendedStatistics() { + // ExtendedStatistics.Builder builder = ExtendedStatistics.newBuilder(); + // + // builder.setBasic(basicCitationStatistics()); + // builder.setAverageNumberOfCitationsPerPaper(RandomUtils.nextFloat()); + // for (int i = 0; i < N_CITATION_STATS; i++) { + // builder.addNumberOfPapersCitedAtLeastXTimes(statisticsKeyValue()); + // } + // + // return builder.build(); + // } + + public static StringField sf(final String s) { + return sf(s, null); + } + + public static StringField sf(final String s, final DataInfo dataInfo) { + final StringField.Builder sf = StringField.newBuilder().setValue(s); + if (dataInfo != null) { + sf.setDataInfo(dataInfo); + } + return sf.build(); + } + + public static OafDecoder embed(final GeneratedMessage msg, + final Kind kind, + final boolean deletedByInference, + final boolean inferred, + final String provenance, + final String action) { + + final Oaf.Builder oaf = Oaf + .newBuilder() + .setKind(kind) + .setLastupdatetimestamp(System.currentTimeMillis()) + .setDataInfo( + DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5") + .setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action))); + switch (kind) { + case entity: + oaf.setEntity((OafEntity) msg); + break; + case relation: + oaf.setRel((OafRel) msg); + break; + default: + break; + } + + return OafDecoder.decode(oaf.build()); + } + + public static OafDecoder embed(final GeneratedMessage msg, final Kind kind) { + return embed(msg, kind, false, false, "inference_provenance", "provenance_action"); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctions.java (revision 58513) @@ -0,0 +1,328 @@ +package eu.dnetlib.data.transform.xml; + +import java.lang.reflect.Type; +import java.util.List; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import org.apache.commons.lang.StringUtils; + + +/** + * * ... + * Created by alessia on 13/05/16. + */ +public class OpenTrialsXsltFunctions { + + /** + * Parses a json to get the first url item. + * + * @param jsonProvList A json string in the following format: [{"url" : "theUrl", "sourceID" : "theSourceId", "sourceName" : "theSourceName"}] + * @return the url value in the first object in the list + */ + @Deprecated + public static String getMainIdentifierURL(String jsonProvList) { + List provs = getProvs(jsonProvList); + for (JsonProv prov : provs) { + if (StringUtils.isNotBlank(prov.getUrl())) return prov.getUrl(); + } + return ""; + } + + /** + * Parses a Json to get the url item associated to the primary record if any + * @param jsonRecordsList A json string in the format [{"source_id","theSourceId","source_url","theUrlOfTheTrialForThisSource", "is_primary","typeOfTheTrial")}] + * @return the url value of the primary record + */ + public static String getPrimaryRecordUrl(String jsonRecordsList){ + List records = getRecords(jsonRecordsList); + for (JsonRecord record : records) { + if(record.isIs_primary() && StringUtils.isNotBlank(record.getSource_url())) + return record.getSource_url(); + } + if (!records.isEmpty() && StringUtils.isNotBlank(records.get(0).getSource_url())){ + return records.get(0).getSource_url(); + } + return ""; + } + + /** + * Parses a Json to get the id of the source associated to the primary record if any + * @param jsonRecordsList A json string in the format [{"source_id","theSourceId","source_url","theUrlOfTheTrialForThisSource", "is_primary","typeOfTheTrial")}] + * @return the id of the source of the primary record + */ + public static String getPrimaryRecordIdentifier(String jsonRecordsList){ + List records = getRecords(jsonRecordsList); + for (JsonRecord record : records) { + if(record.isIs_primary() && StringUtils.isNotBlank(record.getSource_id())) + return record.getSource_id(); + } + if (!records.isEmpty() && StringUtils.isNotBlank(records.get(0).getSource_url())){ + return records.get(0).getSource_id(); + } + return ""; + } + + /** + * Parses a Json to get the list of the not primary url associated to the trial if any + * @param jsonRecordsList A json string in the format [{"source_id","theSourceId","source_url","theUrlOfTheTrialForThisSource", "is_primary","typeOfTheTrial")}] + * @return a string containing the not primary source urls divided by "@@" + */ + public static String getNotPrimaryRecordUrl(String jsonRecordsList){ + boolean found = false; + String ret = ""; + List records = getRecords(jsonRecordsList); + for (JsonRecord record : records) { + if(record.isIs_primary()) + found = true; + else + if(StringUtils.isNotBlank(record.getSource_url())) + ret += record.getSource_url() + "@@"; + } + if (!found && ret.length() > 0){ + ret = ret.substring(ret.indexOf("@@")+2); + } + if(ret.endsWith("@@")) + ret = ret.substring(0,ret.length()-2); + + return ret; + } + + /** + * Parses a Json to get the names of the principal investigators of the trial if any + * @param jsonPeopleList A json string in the format [{"person_name", "thePersonName", "person_id","thePersonId", "person_role",thePersonRole)}] + * @return a string containing the names of the principal investigators divided by @@ + */ + public static String getPrincipalInvestigators(String jsonPeopleList){ + List people = getPeople(jsonPeopleList); + String ret =""; + for (JsonPeople person : people) { + if(StringUtils.isNotBlank(person.getPerson_role()) && (person.getPerson_role().equals("principal_investigator"))){ + ret += conformToGuidelines(person.getPerson_name()) + "@@"; + } + + } + if(ret.endsWith("@@")){ + ret=ret.substring(0,ret.length()-2); + } + return ret; + } + + private static String conformToGuidelines(String person_name) { + + if (person_name.indexOf(",") > -1) + person_name = person_name.substring(0,person_name.indexOf(",")); + String[] name = person_name.split(" "); + person_name = name[name.length-1] + ","; + for(int i=0;i organizations = getOrganizations(jsonOrganizationList); + String ret =""; + for (JsonOrganization o : organizations) { + if (StringUtils.isNotBlank(o.getOrganization_id())) { + ret += o.getOrganization_name() + "@"; + if (o.getOrganization_role().equalsIgnoreCase("primary_sponsor")) + ret += "sponsor@@"; + else + ret += o.getOrganization_role() + "@@"; + } + } + if(ret.endsWith("@@")){ + ret=ret.substring(0,ret.length()-2); + } + return ret; + } + + /** + * Parses a Json to get the names of the locations where the trial take place if any + * @param jsonLocationsList A json string in the format [{"location_name","theLocationName"}] + * @return a string containing the locations associated to the trial divided by "@@" + */ + public static String getTrialLocations(String jsonLocationsList){ + List locations = getLocations(jsonLocationsList); + String ret =""; + for (JsonLocation l : locations) { + if (StringUtils.isNotBlank(l.getLocation_name())) + ret += l.getLocation_name() + "@@"; + + } + if(ret.endsWith("@@")){ + ret=ret.substring(0,ret.length()-2); + } + return ret; + } + + @Deprecated + public static List getProvs(String jsonProvList) { + Gson gson = new Gson(); + Type type = new TypeToken>() { + }.getType(); + return gson.fromJson(jsonProvList, type); + } + + public static List getRecords(String jsonRecordsList) { + Gson gson = new Gson(); + Type type = new TypeToken>() { + }.getType(); + return gson.fromJson(jsonRecordsList, type); + } + + + public static List getPeople(String jsonPeopleList) { + Gson gson = new Gson(); + Type type = new TypeToken>() { + }.getType(); + return gson.fromJson(jsonPeopleList, type); + } + + public static List getOrganizations(String jsonOrganizationsList) { + Gson gson = new Gson(); + Type type = new TypeToken>() { + }.getType(); + return gson.fromJson(jsonOrganizationsList, type); + } + + public static List getLocations(String jsonLocationsList) { + Gson gson = new Gson(); + Type type = new TypeToken>() { + }.getType(); + return gson.fromJson(jsonLocationsList, type); + } + + @Deprecated + static class JsonProv { + + String url, sourceId, sourceName; + + public String getUrl() { + return url; + } + + public void setUrl(final String url) { + this.url = url; + } + + public String getSourceId() { + return sourceId; + } + + public void setSourceId(final String sourceId) { + this.sourceId = sourceId; + } + + public String getSourceName() { + return sourceName; + } + + public void setSourceName(final String sourceName) { + this.sourceName = sourceName; + } + } + + static class JsonRecord{ + String source_id, source_url; + boolean is_primary; + + public String getSource_id() { + return source_id; + } + + public String getSource_url() { + return source_url; + } + + public void setSource_id(String source_id) { + this.source_id = source_id; + } + + public void setSource_url(String source_url) { + this.source_url = source_url; + } + + public boolean isIs_primary() { + return is_primary; + } + + public void setIs_primary(final boolean is_primary) { + this.is_primary = is_primary; + } + } + + static class JsonPeople{ + String person_name, person_id, person_role; + + public String getPerson_name() { + return person_name; + } + + public void setPerson_name(String person_name) { + this.person_name = person_name; + } + + public String getPerson_id() { + return person_id; + } + + public void setPerson_id(String person_id) { + this.person_id = person_id; + } + + public String getPerson_role() { + return person_role; + } + + public void setPerson_role(String person_role) { + this.person_role = person_role; + } + } + + static class JsonOrganization{ + String organization_name, organization_id, organization_role; + + public String getOrganization_name() { + return organization_name; + } + + public void setOrganization_name(String organization_name) { + this.organization_name = organization_name; + } + + public String getOrganization_id() { + return organization_id; + } + + public void setOrganization_id(String organization_id) { + this.organization_id = organization_id; + } + + public String getOrganization_role() { + return organization_role; + } + + public void setOrganization_role(String organization_role) { + this.organization_role = organization_role; + } + } + + static class JsonLocation{ + String location_name; + + public String getLocation_name() { + return location_name; + } + + public void setLocation_name(String location_name) { + this.location_name = location_name; + } + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/FWFXsltFunctions.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/FWFXsltFunctions.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/FWFXsltFunctions.java (revision 58513) @@ -0,0 +1,30 @@ +package eu.dnetlib.data.transform.xml; + +import org.apache.commons.lang.StringUtils; + +/** + * Created by miriam on 04/05/2017. + */ +public class FWFXsltFunctions { + public static String getName(String name, boolean first){ + String[] split = name.split(" "); + name =""; + for (int i = 0; i< split.length; i++) + if(!split[i].equals(split[i].toUpperCase())) { + if (first) + name += split[i] + " "; + } + else{ + if(!first){ + name += split[i] + " "; + } + } + return name.trim(); + } + + public static String getMd5(String name){ + if(StringUtils.isNotBlank(name ) && StringUtils.isNotEmpty(name)) + return AbstractDNetXsltFunctions.md5(name); + return name; + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/Ontologies.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/Ontologies.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/Ontologies.java (revision 58513) @@ -0,0 +1,70 @@ +package eu.dnetlib.utils.ontologies; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Maps; +import com.google.gson.GsonBuilder; + +import eu.dnetlib.data.mapreduce.util.RelDescriptor; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Created by sandro on 12/13/16. + */ +public class Ontologies extends HashMap { + + private static final Log log = LogFactory.getLog(Ontologies.class); + + private Map> inverse = Maps.newHashMap(); + + public String inverseOf(final RelDescriptor rd) { + + if (!containsKey(rd.getRelType())) { + log.warn(String.format("unable to find ontology '%s'", rd.getRelType())); + return null; + } + return get(rd.getRelType()).inverseOf(rd.getRelClass()); + } + + public List getTerms(final String termCode) { + if (inverse.isEmpty()) { + initInverse(); + } + return inverse.get(termCode); + } + + private void initInverse() { + log.info("initialising inverse Ontology terms"); + values().forEach(o -> o.getTerms().values().forEach(t -> { + if (!inverse.containsKey(t.getCode())) { + inverse.put(t.getCode(), new ArrayList<>()); + } + inverse.get(t.getCode()).add(t); + })); + + } + + public String toJson() { + return toJson(false); + } + + public String toJson(boolean pretty) { + + final GsonBuilder gson = new GsonBuilder(); + if (pretty) { + gson.setPrettyPrinting(); + } + + return gson.create().toJson(this); + } + + @Override + public String toString() { + return toJson(); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/OntologyTerm.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/OntologyTerm.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/utils/ontologies/OntologyTerm.java (revision 58513) @@ -0,0 +1,71 @@ +package eu.dnetlib.utils.ontologies; + +import com.google.gson.Gson; + +/** + * Created by claudio on 12/12/2016. + */ +public class OntologyTerm { + + private String code; + private String encoding; + private String englishName; + private String nativeName; + + private String inverseCode; + + public static OntologyTerm newInstance() { + return new OntologyTerm(); + } + + public String getCode() { + return code; + } + + public String getEncoding() { + return encoding; + } + + public String getEnglishName() { + return englishName; + } + + public String getNativeName() { + return nativeName; + } + + public String getInverseCode() { + return inverseCode; + } + + public OntologyTerm setCode(final String code) { + this.code = code; + return this; + } + + public OntologyTerm setEncoding(final String encoding) { + this.encoding = encoding; + return this; + } + + public OntologyTerm setEnglishName(final String englishName) { + this.englishName = englishName; + return this; + } + + public OntologyTerm setNativeName(final String nativeName) { + this.nativeName = nativeName; + return this; + } + + public OntologyTerm setInverseCode(final String inverseCode) { + this.inverseCode = inverseCode; + return this; + } + + @Override + public String toString() { + return new Gson().toJson(this); + } + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/TrustOrdering.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/TrustOrdering.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/TrustOrdering.java (revision 58513) @@ -0,0 +1,34 @@ +package eu.dnetlib.data.transform; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Ordering; + +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust; +import org.apache.commons.lang3.StringUtils; + +public class TrustOrdering extends Ordering { + + @Override + public int compare(Oaf left, Oaf right) { + String lTrust = left.getDataInfo().getTrust(); + String rTrust = right.getDataInfo().getTrust(); + + if (lTrust.equals(rTrust)) return 0; + + if (lTrust.equals(SpecialTrust.INFINITE.toString())) return 1; + if (rTrust.equals(SpecialTrust.INFINITE.toString())) return -1; + + if (lTrust.equals(SpecialTrust.NEUTRAL.toString())) return 1; + if (rTrust.equals(SpecialTrust.NEUTRAL.toString())) return -1; + + return Float.compare( + Float.parseFloat(StringUtils.isBlank(lTrust) ? "0.9" : lTrust), + Float.parseFloat(StringUtils.isBlank(rTrust) ? "0.9" : rTrust)); + } + + public static ImmutableList sort(Iterable entities) { + return new TrustOrdering().immutableSortedCopy(entities); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ElementList.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ElementList.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/ElementList.java (revision 58513) @@ -0,0 +1,14 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +@SuppressWarnings("serial") +public class ElementList extends ArrayList { + public List listValues() { + return Lists.newArrayList(Iterables.transform(this, e -> e.getText())); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/ProjectDetail.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/ProjectDetail.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/ProjectDetail.java (revision 58513) @@ -0,0 +1,157 @@ +package eu.dnetlib.openaire.exporter.model; + +import java.io.IOException; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.List; + +import com.google.gson.Gson; +import org.supercsv.cellprocessor.Optional; +import org.supercsv.cellprocessor.ift.CellProcessor; +import org.supercsv.cellprocessor.ift.StringCellProcessor; +import org.supercsv.io.CsvBeanReader; +import org.supercsv.io.CsvBeanWriter; +import org.supercsv.io.ICsvBeanReader; +import org.supercsv.io.ICsvBeanWriter; +import org.supercsv.prefs.CsvPreference; +import org.supercsv.util.CsvContext; + +/** + * Created by claudio on 22/09/16. + */ +public class ProjectDetail { + + private static final String[] NAMEMAPPING = { "projectId", "acronym", "code", "optional1", "optional2", "jsonextrainfo", "fundingPath" }; + + private String projectId; + private String acronym; + private String code; + private String optional1; + private String optional2; + private String jsonextrainfo; + private List fundingPath; + + public ProjectDetail() {} + + public static ProjectDetail fromJson(final String json) { + return new Gson().fromJson(json, ProjectDetail.class); + } + + public static ProjectDetail fromCSV(final String csv) throws IOException { + ICsvBeanReader beanReader = null; + try { + beanReader = new CsvBeanReader(new StringReader(csv), CsvPreference.STANDARD_PREFERENCE); + return beanReader.read(ProjectDetail.class, NAMEMAPPING, getProcessors(new StringCellProcessor() { + @Override + public Object execute(final Object value, final CsvContext context) { + return new Gson().fromJson(value.toString(), List.class); + } + })); + } finally { + if (beanReader != null) { + beanReader.close(); + } + } + } + + /** + * Sets up the processors used for the examples. There are 10 CSV columns, so 10 processors are defined. Empty + * columns are read as null (hence the NotNull() for mandatory columns). + * + * @return the cell processors + */ + private static CellProcessor[] getProcessors(final CellProcessor fundingPathProcessor) { + return new CellProcessor[] { + new Optional(), // projectId + new Optional(), // acronym + new Optional(), // code + new Optional(), // optional1 + new Optional(), // optional2 + new Optional(), // jsonextrainfo + fundingPathProcessor + }; + } + + public String asJson() { + return new Gson().toJson(this) + '\n'; + } + + public String asCSV() throws IOException { + final StringWriter sb = new StringWriter(); + try (ICsvBeanWriter beanWriter = new CsvBeanWriter(sb, CsvPreference.STANDARD_PREFERENCE)) { + beanWriter.write(this, NAMEMAPPING, getProcessors(new StringCellProcessor() { + @Override + public Object execute(final Object value, final CsvContext context) { + return new Gson().toJson(value); + } + })); + beanWriter.flush(); + } + + return sb.toString(); + } + + public String getProjectId() { + return projectId; + } + + public ProjectDetail setProjectId(final String projectId) { + this.projectId = projectId; + return this; + } + + public String getAcronym() { + return acronym; + } + + public ProjectDetail setAcronym(final String acronym) { + this.acronym = acronym; + return this; + } + + public String getCode() { + return code; + } + + public ProjectDetail setCode(final String code) { + this.code = code; + return this; + } + + public String getOptional1() { + return optional1; + } + + public ProjectDetail setOptional1(final String optional1) { + this.optional1 = optional1; + return this; + } + + public String getOptional2() { + return optional2; + } + + public ProjectDetail setOptional2(final String optional2) { + this.optional2 = optional2; + return this; + } + + public String getJsonextrainfo() { + return jsonextrainfo; + } + + public ProjectDetail setJsonextrainfo(final String jsonextrainfo) { + this.jsonextrainfo = jsonextrainfo; + return this; + } + + public List getFundingPath() { + return fundingPath; + } + + public ProjectDetail setFundingPath(final List fundingPath) { + this.fundingPath = fundingPath; + return this; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/Project.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/Project.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/openaire/exporter/model/Project.java (revision 58513) @@ -0,0 +1,274 @@ +package eu.dnetlib.openaire.exporter.model; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import org.apache.commons.lang.StringUtils; + +/** + * Created by claudio on 20/09/16. + */ +public class Project { + + public static final String INFO_EU_REPO_GRANT_AGREEMENT = "info:eu-repo/grantAgreement/"; + private String code; + private String acronym; + private String title; + private String call_identifier; + private String startdate; + private String enddate; + private boolean ec_sc39; + private boolean oa_mandate_for_publications; + private boolean ec_article29_3; + private String fundingpathid; + private String description; + private String jurisdiction; + private String legalname; + private String funder; + private String countryclass; + private String role; + private String firstname; + private String secondnames; + private String email; + + public Project() { + } + + public String getIdnamespace() { + String res = INFO_EU_REPO_GRANT_AGREEMENT + getFunder()+"/"; + final String fundingProgram = asFundingProgram(getFundingpathid()); + if (StringUtils.isNotBlank(fundingProgram)) { + res += fundingProgram; + } + res += "/" + escapeCode(getCode()); + if (StringUtils.isNotBlank(getJurisdiction())) { + res += "/" + getJurisdiction(); + } + return res; + } + + public String getListLabel() { + return String.format("for:value:component:_%s_project_id", asFunder(getFunder())); + } + + private String asFunder(final String legalshortname) { + switch (legalshortname.toLowerCase()) { + case "ec": + return asFundingProgram(getFundingpathid()).toLowerCase(); + default: + return legalshortname.toLowerCase(); + } + } + + public List asList() { + return Lists.newArrayList( + clean(getCode()), + clean(getAcronym()), + clean(getTitle()), + clean(getCall_identifier()), + clean(getStartdate()), + clean(getEnddate()), + String.valueOf(isEc_article29_3()), + String.valueOf(isOa_mandate_for_publications()), + String.valueOf(isEc_article29_3()), + clean(getDescription()), + clean(getLegalname()), + clean(getCountryclass()), + clean(getRole()), + clean(getFirstname()), + clean(getSecondnames()), + clean(getEmail())); + } + + private String clean(final String s) { + return StringUtils.isNotBlank(s) ? "\"" + s.replaceAll("\\n|\\t|\\s+", " ").replace("\"","\"\"").trim() + "\"" : ""; + } + + private String escapeCode(final String code) { + return replaceSlash(code); + } + + private String asFundingProgram(final String fundingpathid) { + final ArrayList strings = Lists.newArrayList(Splitter.on("::").split(fundingpathid)); + if(strings.size() <= 1) throw new IllegalStateException("Unexpected funding id: "+fundingpathid); + if(strings.size() == 2) return ""; + else return replaceSlash(strings.get(2)); + } + + private String replaceSlash(final String s) { + return s.replaceAll("/", "%2F"); + } + + public String getCode() { + return code; + } + + public Project setCode(final String code) { + this.code = code; + return this; + } + + public String getAcronym() { + return acronym; + } + + public Project setAcronym(final String acronym) { + this.acronym = acronym; + return this; + } + + public String getTitle() { + return title; + } + + public Project setTitle(final String title) { + this.title = title; + return this; + } + + public String getCall_identifier() { + return call_identifier; + } + + public Project setCall_identifier(final String call_identifier) { + this.call_identifier = call_identifier; + return this; + } + + public String getStartdate() { + return startdate; + } + + public Project setStartdate(final String startdate) { + this.startdate = startdate; + return this; + } + + public String getEnddate() { + return enddate; + } + + public Project setEnddate(final String enddate) { + this.enddate = enddate; + return this; + } + + public boolean isEc_sc39() { + return ec_sc39; + } + + public Project setEc_sc39(final boolean ec_sc39) { + this.ec_sc39 = ec_sc39; + return this; + } + + public boolean isOa_mandate_for_publications() { + return oa_mandate_for_publications; + } + + public Project setOa_mandate_for_publications(final boolean oa_mandate_for_publications) { + this.oa_mandate_for_publications = oa_mandate_for_publications; + return this; + } + + public boolean isEc_article29_3() { + return ec_article29_3; + } + + public Project setEc_article29_3(final boolean ec_article29_3) { + this.ec_article29_3 = ec_article29_3; + return this; + } + + public String getFundingpathid() { + return fundingpathid; + } + + public Project setFundingpathid(final String fundingpathid) { + this.fundingpathid = fundingpathid; + return this; + } + + public String getDescription() { + return description; + } + + public Project setDescription(final String description) { + this.description = description; + return this; + } + + public String getJurisdiction() { + return jurisdiction; + } + + public Project setJurisdiction(final String jurisdiction) { + this.jurisdiction = jurisdiction; + return this; + } + + public String getLegalname() { + return legalname; + } + + public Project setLegalname(final String legalname) { + this.legalname = legalname; + return this; + } + + public String getCountryclass() { + return countryclass; + } + + public Project setCountryclass(final String countryclass) { + this.countryclass = countryclass; + return this; + } + + public String getRole() { + return role; + } + + public Project setRole(final String role) { + this.role = role; + return this; + } + + public String getFirstname() { + return firstname; + } + + public Project setFirstname(final String firstname) { + this.firstname = firstname; + return this; + } + + public String getSecondnames() { + return secondnames; + } + + public Project setSecondnames(final String secondnames) { + this.secondnames = secondnames; + return this; + } + + public String getEmail() { + return email; + } + + public Project setEmail(final String email) { + this.email = email; + return this; + } + + public String getFunder() { + return funder; + } + + public Project setFunder(final String funder) { + this.funder = funder; + return this; + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafToRowMapper.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafToRowMapper.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafToRowMapper.java (revision 58513) @@ -0,0 +1,47 @@ +package eu.dnetlib.data.transform; + +import java.util.List; +import java.util.function.Function; + +import com.google.common.collect.Lists; +import eu.dnetlib.data.mapreduce.util.OafDecoder; +import eu.dnetlib.data.mapreduce.util.OafEntityDecoder; +import eu.dnetlib.data.proto.OafProtos.Oaf; + +public class OafToRowMapper implements Function> { + + public static final String BODY = "body"; + + @Override + public List apply(final Oaf oaf) { + final List rows = Lists.newArrayList(); + + final OafDecoder d = OafDecoder.decode(oaf); + final OafEntityDecoder entity = d.decodeEntity(); + + final Row r = new Row(d.getCFQ(), entity.getId()); + switch (entity.getType()) { + + case project: + r.addColumn(new Column<>(BODY, oaf.toByteArray())); + break; + case result: + oaf.getEntity().getCachedOafRelList().stream() + .map(cachedRel -> { + final Oaf.Builder oafRel = Oaf.newBuilder(cachedRel); + oafRel.getRelBuilder().clearCachedOafTarget(); + return oafRel.build(); + }).forEach(oafRel -> r.addColumn(new Column<>(OafDecoder.decode(oafRel).getCFQ(), oafRel.toByteArray()))); + break; + case datasource: + + break; + case organization: + + break; + } + + return rows; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2updateActions.xslt =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2updateActions.xslt (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2updateActions.xslt (revision 58513) @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/Element.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/Element.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml/Element.java (revision 58513) @@ -0,0 +1,65 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.Map; + +import org.apache.commons.lang.StringUtils; + +import com.google.common.collect.Maps; + +public class Element { + + private String text; + private Map attributes; + + public Element(final String text, final Map attributes) { + this.text = text; + this.attributes = attributes; + } + + public Element(final String text) { + this.text = text; + this.attributes = Maps.newHashMap(); + } + + public Element() { + this.text = ""; + this.attributes = Maps.newHashMap(); + } + + public String getText() { + return text; + } + + public void setText(final String text) { + this.text = text; + } + + public Map getAttributes() { + return attributes; + } + + public String getAttributeValue(final String attributeName) { + return getAttributes().get(attributeName); + } + + public void setAttributes(final Map attributes) { + this.attributes = attributes; + } + + public boolean isEmpty() { + return !(hasText() || hasAttributes()); + } + + private boolean hasAttributes() { + return (getAttributes() != null) && !getAttributes().isEmpty(); + } + + public boolean hasText() { + return (getText() != null) && !getText().isEmpty(); + } + + @Override + public String toString() { + return "{ " + StringUtils.left(text, 20) + attributes.toString() + " }"; + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2insertActions.xslt =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2insertActions.xslt (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2insertActions.xslt (revision 58513) @@ -0,0 +1,134 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Utils.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Utils.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Utils.java (revision 58513) @@ -0,0 +1,242 @@ +package eu.dnetlib.data.transform.xml2; + +import java.nio.charset.Charset; +import java.util.Collection; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.collect.Maps; +import com.google.protobuf.Descriptors; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import eu.dnetlib.data.proto.FieldTypeProtos.*; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.ResultProtos.Result.Context; +import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; +import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; + +import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.xpath; + +public class Utils { + + public static final String URL_REGEX = "^(http|https|ftp)\\://.*"; + + public static final String ID_SEPARATOR = "::"; + + public static final String TITLE_TYPE = "titleType"; + public static final String DATE_TYPE = "dateType"; + public static final String KEYWORD = "keyword"; + + public static final String DNET_EXT_REF_TYPOLOGIES = "dnet:externalReference_typologies"; + public static final String DNET_TITLE_TYPOLOGIES = "dnet:dataCite_title"; + public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; + public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; + public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource"; + public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource"; + public static final String DNET_ACCESS_MODES = "dnet:access_modes"; + public static final String DNET_LANGUAGES = "dnet:languages"; + public static final String DNET_PID_TYPES = "dnet:pid_types"; + + public static final String IDENTIFIER_TYPE = "identifierType"; + public static final String ALTERNATE_IDENTIFIER_TYPE = "alternateIdentifierType"; + public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; + + public static final String CLASSID = "classid"; + public static final String CLASSNAME = "classname"; + public static final String SCHEMEID = "schemeid"; + public static final String SCHEMENAME = "schemename"; + + public static final String RELATION_TYPE = "relationType"; + public static final String RELATED_IDENTIFIER_TYPE = "relatedIdentifierType"; + public static final String RIGHTS_URI = "rightsURI"; + + public static final String UTF_8 = "UTF-8"; + + // publication + public static final String PROJECTID = "projectid"; + public static final String RELATED_DATASET = "relatedDataSet"; + public static final String RELATED_PUBLICATION = "relatedPublication"; + public static final String RELATED_IDENTIFIER = "relatedIdentifier"; + + protected static Map mappingAccess = Maps.newHashMap(); + + static { + mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN"); + mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED"); + mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED"); + mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO"); + + // Transformator now maps the access rights into proper values, not sure if it does for all datasets. + mappingAccess.put("OPEN", "OPEN"); + mappingAccess.put("CLOSED", "CLOSED"); + mappingAccess.put("RESTRICTED", "RESTRICTED"); + mappingAccess.put("EMBARGO", "EMBARGO"); + } + + public static String getValue(final Node node, final String defaultValue) { + return (node != null && StringUtils.isNotBlank(node.getTextValue())) ? node.getTextValue() : defaultValue; + } + + public static String getValue(final String value, final String defaultValue) { + return StringUtils.isNotBlank(value) ? value : defaultValue; + } + + public static KeyValue getKV(final String id, final String name) { + return KeyValue.newBuilder().setKey(id).setValue(name).build(); + } + + public static Qualifier getSimpleQualifier(final String classname, final String schemename) { + return getQualifier(classname, classname, schemename, schemename); + } + + public static Qualifier getQualifier(final String classid, final String classname, final String schemeid, final String schemename) { + return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename).build(); + } + + public static StructuredProperty getStructuredProperty(final String value, + final String classid, + final String classname, + final String schemeid, + final String schemename) { + if ((value == null) || value.isEmpty()) return null; + return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build(); + } + + /** + * Gets the classname of the given class code + * + * @param code class code. + * @return the class name, if the code is a key of the map. The code itself otherwise. + */ + public static String getClassName(final String code) { + final String classname = AbstractDNetXsltFunctions.code2name.get(code); + if (StringUtils.isBlank(classname)) return code; + return classname; + } + + public static String metadataXpath(final String otherValues) { + return xpath("record", "metadata", otherValues); + } + + public static void addField(final Message.Builder builder, final Descriptors.FieldDescriptor descriptor, final Object value) { + + if (value == null) return; + + if (value instanceof Stream) { + ((Stream) value).forEach(o -> addField(builder, descriptor, o)); + } else if (value instanceof Collection>) { + for (final Object o : (Collection) value) { + addField(builder, descriptor, o); + } + } else { + Object v = value; + switch (descriptor.getType()) { + case BOOL: + v = Boolean.valueOf(value.toString()); + break; + case BYTES: + v = value.toString().getBytes(Charset.forName(UTF_8)); + break; + case DOUBLE: + v = Double.valueOf(value.toString()); + break; + case FLOAT: + v = Float.valueOf(value.toString()); + break; + case INT32: + case INT64: + case SINT32: + case SINT64: + v = Integer.valueOf(value.toString()); + break; + case MESSAGE: + final Message.Builder q = builder.newBuilderForField(descriptor); + + if (value instanceof Message.Builder) { + v = ((Message.Builder) value).build(); + final byte[] b = ((Message) v).toByteArray(); + try { + q.mergeFrom(b); + } catch (final InvalidProtocolBufferException e) { + throw new IllegalArgumentException("Unable to merge value: " + v + " with builder: " + q.getDescriptorForType().getName()); + } + } else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof Qualifier) { + q.mergeFrom((Qualifier) v); + } + } else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof StructuredProperty) { + q.mergeFrom((StructuredProperty) v); + } + } else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof KeyValue) { + q.mergeFrom((KeyValue) v); + } + } else if (Journal.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof Journal) { + q.mergeFrom((Journal) v); + } + } else if (Context.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof Context) { + q.mergeFrom((Context) v); + } + } else if (Author.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof Author) { + q.mergeFrom((Author) v); + } + } else if (ExternalReference.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof ExternalReference) { + q.mergeFrom((ExternalReference) v); + } + } else if (OafRel.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof OafRel) { + q.mergeFrom((OafRel) v); + } + } else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof StringField) { + q.mergeFrom((StringField) v); + } else { + q.setField(StringField.getDescriptor().findFieldByName("value"), v); + } + } else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof BoolField) { + q.mergeFrom((BoolField) v); + } else if (value instanceof String) { + q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) v)); + } else { + q.setField(BoolField.getDescriptor().findFieldByName("value"), v); + } + } else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { + if (value instanceof IntField) { + q.mergeFrom((IntField) v); + } else if (value instanceof String) { + q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) v)); + } else { + q.setField(IntField.getDescriptor().findFieldByName("value"), v); + } + } + + v = q.buildPartial(); + break; + default: + break; + } + + doAddField(builder, descriptor, v); + } + } + + private static void doAddField(final Message.Builder builder, final Descriptors.FieldDescriptor fd, final Object value) { + if (value != null) { + if (fd.isRepeated()) { + builder.addRepeatedField(fd, value); + } else if (fd.isOptional() || fd.isRequired()) { + builder.setField(fd, value); + } + } + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationDescriptor.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationDescriptor.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationDescriptor.java (revision 58513) @@ -0,0 +1,52 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +import com.google.common.collect.Maps; +import com.google.protobuf.Message.Builder; + +import eu.dnetlib.miscutils.collections.Pair; + +/** + * Container class for Proto Builder and a Map that track associations field names -> [xpath, function] + */ +public class SpecificationDescriptor { + + private Builder builder; + + private Map, Object>>> fields; + + public static SpecificationDescriptor newInstance() { + return new SpecificationDescriptor().setFields(Maps.newHashMap()); + } + + public Builder getBuilder() { + return builder; + } + + public SpecificationDescriptor put(final String fieldName, final String xpath, final Function, Object> f) { + getFields().put(fieldName, new Pair<>(xpath, f)); + return this; + } + + public Pair, Object>> get(final String fieldName) { + return getFields().get(fieldName); + } + + public Map, Object>>> getFields() { + return fields; + } + + public SpecificationDescriptor setBuilder(final Builder builder) { + this.builder = builder; + return this; + } + + public SpecificationDescriptor setFields(final Map, Object>>> fields) { + this.fields = fields; + return this; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AbstractAuthorMergerTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AbstractAuthorMergerTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AbstractAuthorMergerTest.java (revision 58513) @@ -0,0 +1,167 @@ +package eu.dnetlib.data.transform; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.internal.StringMap; +import com.googlecode.protobuf.format.JsonFormat; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; +import org.junit.Assert; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static eu.dnetlib.data.proto.FieldTypeProtos.Author; +import static eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import static java.lang.String.format; +import static java.util.stream.Collectors.*; + +public abstract class AbstractAuthorMergerTest { + + protected void doTestMergeAuthorGroup(final Iterable input) throws IOException { + final DescriptiveStatistics groupSize = new DescriptiveStatistics(); + final AtomicInteger authorCount = new AtomicInteger(0); + final AtomicInteger authorCountMerged = new AtomicInteger(0); + + final AtomicInteger pidCount = new AtomicInteger(0); + final AtomicInteger pidCountMerged = new AtomicInteger(0); + final Map pidTypes = new HashMap<>(); + final Map pidTypesMerged = new HashMap<>(); + final AtomicInteger orcidCount = new AtomicInteger(0); + + final AtomicInteger equalGroupSizeCount = new AtomicInteger(0); + final AtomicInteger lines = new AtomicInteger(0); + + final List> authorGroups = Lists.newArrayList(); + + final Set orcidIn = new HashSet<>(); + final Set orcidOut= new HashSet<>(); + + for(String line : input) { + lines.incrementAndGet(); + final List>> list = new Gson().fromJson(line, List.class); + final List> groups = new ArrayList<>(); + groups.addAll(list.stream() + .filter(Objects::nonNull) + .map(j -> { + return j.stream().map(AbstractAuthorMergerTest::fixRank).collect(toList()); + }) + .map(AbstractAuthorMergerTest::asAuthors) + .filter(Objects::nonNull) + .collect(toList())); + + groups.stream().forEach(g -> { + groupSize.addValue(g.size()); + g.stream().forEach(a -> { + pidCount.addAndGet(a.getPidCount()); + authorCount.incrementAndGet(); + + countPids(pidTypes, a); + orcidIn.addAll(getOrcid(a)); + }); + }); + + final HashSet groupSizes = groups.stream() + .map(a -> a.size()) + .collect(toCollection(HashSet::new)); + + if (groupSizes.size() == 1) { + equalGroupSizeCount.incrementAndGet(); + } + + authorGroups.addAll(groups); + + final List merged = AuthorMerger.merge(groups); + + if (!groupSizes.isEmpty()) { + Assert.assertTrue( + format("the size of the merged group must be the size of one of the input groups. Merged size: %s, Sizes: %s ", + merged.size(), groupSizes), + groupSizes.contains(merged.size())); + } + + authorCountMerged.addAndGet(merged.size()); + merged.forEach(a -> { + pidCountMerged.addAndGet(a.getPidCount()); + countPids(pidTypesMerged, a); + orcidOut.addAll(getOrcid(a)); + }); + } + + System.out.println(format("lines %s", lines.intValue())); + System.out.println(format("author groups %s", authorGroups.size())); + System.out.println(format("author count %s", authorCount.intValue())); + System.out.println(format("avg authorGroups per group %s", authorCount.doubleValue() / authorGroups.size())); + + System.out.println(format("pid types %s", pidTypes)); + System.out.println(format("pid count %s", pidCount.intValue())); + System.out.println(format("pid rate among authors %s", pidCount.doubleValue() / authorCount.intValue())); + + System.out.println(format("equal group size %s", equalGroupSizeCount.incrementAndGet())); + + System.out.println(format("[merged] pid count %s", pidCountMerged.intValue())); + System.out.println(format("[merged] avg pid per group %s", pidCountMerged.doubleValue() / authorCountMerged.intValue())); + System.out.println(format("[merged] pid types %s", pidTypesMerged)); + + System.out.println(format("ORCIDs in %s", orcidIn.size())); + System.out.println(format("ORCIDs out %s", orcidOut.size())); + + Sets.SetView diff = Sets.difference(orcidIn, orcidOut); + System.out.println(format("Difference between the number of distinct input ORCIDs and output (merged) ORCIDs: %s", diff.size())); + + System.out.println(format("\ngroup size %s", groupSize)); + } + + private List getOrcid(Author a) { + return a.getPidList().stream() + .filter(p -> p.getKey().equalsIgnoreCase("orcid")) + .map(KeyValue::getValue) + .collect(Collectors.toList()); + } + + private void countPids(Map pidTypes, Author a) { + a.getPidList().stream() + .collect(groupingBy(KeyValue::getKey, summingInt(e -> 1))) + .forEach((k, v) -> pidTypes.merge(k, v, (v1, v2) -> v1 + v2)); + } + + private static StringMap fixRank(final StringMap j) { + final StringMap m = new StringMap<>(); + + m.putAll(j); + + Object rank = j.get("rank"); + rank = StringUtils.substringBefore(rank.toString(), ".0"); + m.put("rank", Integer.parseInt(rank.toString())); + return m; + } + + private static List asAuthors(Object o) { + + final JsonElement json = new Gson().toJsonTree(o); + + return StreamSupport.stream(json.getAsJsonArray().spliterator(), false) + .map(j -> j.toString()) + .filter(Objects::nonNull) + .map(AbstractAuthorMergerTest::asAuthor) + .filter(Objects::nonNull) + .collect(toList()); + } + + private static Author asAuthor(String json) { + + final Author.Builder a = Author.newBuilder(); + try { + JsonFormat.merge(json, a); + } catch (JsonFormat.ParseException e) { + throw new IllegalArgumentException(e); + } + return a.build(); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/PublicationToProto.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/PublicationToProto.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/PublicationToProto.java (revision 58513) @@ -0,0 +1,67 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.Map; + +import com.google.common.collect.Maps; + +import static eu.dnetlib.data.transform.xml2.Utils.*; +import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.xpath; + +public class PublicationToProto extends AbstractResultDom4jParser { + + public PublicationToProto() { + super(getFields()); + } + + public PublicationToProto(final boolean invisible, final String provenance, final String trust) { + super(invisible, provenance, trust, getFields()); + } + + @Override + protected String getResulttype(final String cobjcategory) { + switch (cobjcategory) { + case "0029": + return "software"; + default: + return "publication"; + } + } + + protected static Map getFields() { + final Map fields = Maps.newHashMap(); + + fields.put("originalId", xpath("record", "header", "recordIdentifier")); + fields.put("dateofcollection", xpath("record", "header", "dateOfCollection")); + fields.put("dateoftransformation", xpath("record", "header", "dateOfTransformation")); + fields.put("collectedfrom", metadataXpath("collectedFrom")); + fields.put("pid", metadataXpath("identifier")); + fields.put("license", metadataXpath("license")); + fields.put("accessright", metadataXpath("accessrights")); + fields.put("instancetype", metadataXpath("CobjCategory")); + fields.put("hostedby", metadataXpath("hostedBy")); + fields.put("url", metadataXpath("identifier")); + fields.put("title", metadataXpath("title")); + fields.put("description", metadataXpath("description")); + fields.put("dateofacceptance", metadataXpath("dateAccepted")); + fields.put("embargoenddate", metadataXpath("embargoenddate")); + fields.put("storagedate", metadataXpath("storagedate")); + fields.put("author", metadataXpath("creator")); + fields.put("contributor", metadataXpath("contributor")); + fields.put("subject", metadataXpath("subject")); + fields.put("format", metadataXpath("format")); + fields.put("source", metadataXpath("source")); + fields.put("publisher", metadataXpath("publisher")); + fields.put("language", metadataXpath("language")); + fields.put("resulttype", metadataXpath("CobjCategory")); + fields.put("concept", metadataXpath("concept")); + fields.put("externalReference", metadataXpath("reference")); + + fields.put("cachedRel", String.format("%s | %s | %s", + metadataXpath("projectid"), + metadataXpath("relatedDataSet"), + xpath("record", "metadata") + "//*[local-name()='relatedIdentifier']")); + + return fields; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AuthorMergerTestIT.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AuthorMergerTestIT.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/AuthorMergerTestIT.java (revision 58513) @@ -0,0 +1,47 @@ +package eu.dnetlib.data.transform; + +import com.google.common.base.Predicates; +import com.mongodb.client.FindIterable; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.MongoIterable; +import eu.dnetlib.data.transform.xml.vtd.ConfigurationTestConfig; +import org.bson.Document; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; + +import java.io.IOException; +import java.util.List; + +import static java.util.stream.Collectors.toList; + +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(classes = { ConfigurationTestConfig.class }) +public class AuthorMergerTestIT extends AbstractAuthorMergerTest { + + @Autowired + private MongoDatabase db; + + private String collection = "authors"; + + @Test + public void test_mergeAuthorsGroup() throws IOException { + + final FindIterable docs = db.getCollection(collection).find(); + + MongoIterable i = docs + .map(d -> ((List>) d.get("authors")).stream() + .filter(Predicates.notNull()) + .map(g -> g.stream() + .map(Document::toJson) + .collect(toList())) + .collect(toList())) + .map(List::toString); + + doTestMergeAuthorGroup(i); + + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Node.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Node.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Node.java (revision 58513) @@ -0,0 +1,36 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.Map; + +public class Node { + + private String name; + + private String textValue; + + private Map attributes; + + public String getTextValue() { + return textValue; + } + + public void setTextValue(final String textValue) { + this.textValue = textValue; + } + + public Map getAttributes() { + return attributes; + } + + public void setAttributes(final Map attributes) { + this.attributes = attributes; + } + + public String getName() { + return name; + } + + public void setName(final String name) { + this.name = name; + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoIT.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoIT.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoIT.java (revision 58513) @@ -0,0 +1,77 @@ +package eu.dnetlib.data.transform.xml.vtd; + +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; +import eu.dnetlib.data.transform.xml2.DatasetToProto; +import org.apache.commons.lang3.time.StopWatch; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; +import org.bson.Document; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; + +import java.io.IOException; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.StreamSupport; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(classes = { ConfigurationTestConfig.class }) +public class VtdParserToProtoIT { + + private static final Log log = LogFactory.getLog(VtdParserToProtoIT.class); + public static final String COLLECTION_NAME = "datacite"; + private static final int BATCH_SIZE = 10000; + public static final int LOG_FREQ = 5000; + + @Autowired + private MongoDatabase db; + + @Test + @Ignore + public void testParseDatacite() throws IOException { + + final MongoCollection collection = db.getCollection(COLLECTION_NAME); + + final long collectionSize = collection.count(); + log.info(String.format("found %s records in collection '%s'", collectionSize, COLLECTION_NAME)); + + final AtomicInteger read = new AtomicInteger(0); + final DescriptiveStatistics stats = new DescriptiveStatistics(); + + final StopWatch timer = new StopWatch(); + + final DatasetToProto mapper = new DatasetToProto(); + StreamSupport.stream(collection.find().batchSize(BATCH_SIZE).spliterator(), false) + .peek(d -> { + if (read.addAndGet(1) % LOG_FREQ == 0) { + log.info(String.format("records read so far %s", read.get())); + log.info(String.format("stats so far %s", stats.toString())); + } + }) + .map(d -> (String) d.get("body")) + .filter(Objects::nonNull) + .peek(s -> timer.start()) + .map(mapper) + .forEach(oaf -> { + assertNotNull(oaf); + assertTrue(oaf.hasEntity()); + + timer.stop(); + stats.addValue(timer.getTime()); + timer.reset(); + }); + + log.info(String.format("processed %s/%s records", read.get(), collectionSize)); + } + + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/DatasetToProto.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/DatasetToProto.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/DatasetToProto.java (revision 58513) @@ -0,0 +1,72 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.Map; + +import com.google.common.collect.Maps; + +import static eu.dnetlib.data.transform.xml2.Utils.*; +import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.xpath; + +public class DatasetToProto extends AbstractResultVtdParser { + + public static DatasetToProto newInstance() { + return new DatasetToProto(); + } + + public DatasetToProto() { + super(getFields()); + } + + public DatasetToProto(final boolean invisible, final String provenance, final String trust) { + super(invisible, provenance, trust, getFields()); + } + + @Override + protected String getResulttype(final String cobjcategory) { + switch (cobjcategory) { + case "0029": + return "software"; + default: + return "dataset"; + } + } + + private static Map getFields() { + final Map fields = Maps.newHashMap(); + fields.put("originalId", "//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name()='resource']//*[local-name()='alternateIdentifier']"); + fields.put("collectedfrom", metadataXpath("collectedFrom")); + fields.put("pid", "//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name()='resource']//*[local-name()='alternateIdentifier']"); + fields.put("dateofcollection", xpath("record", "header", "dateOfCollection")); + fields.put("dateoftransformation", xpath("record", "header", "dateOfTransformation")); + fields.put("license", metadataXpath("resource") + "//*[local-name()='rights']"); + fields.put("accessright", metadataXpath("accessrights")); + fields.put("instancetype", metadataXpath("CobjCategory")); + fields.put("hostedby", metadataXpath("hostedBy")); + fields.put("url", "/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource']/*[local-name()='identifier' and (./@identifierType='DOI' or ./@identifierType='URL' ) ] | /*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource']//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']"); + fields.put("title", "//*[local-name() = 'title']"); + fields.put("description", xpath("record", "metadata", "resource", "descriptions", "description")); + fields.put("storagedate", "//*[local-name() = 'date' and ./@dateType = 'Issued']"); + fields.put("lastmetadataupdate", "//*[local-name() = 'date' and ./@dateType = 'Updated']"); + fields.put("embargoenddate", "//*[local-name() = 'date' and ./@dateType = 'Available']"); + fields.put("dateofacceptance", metadataXpath("dateAccepted")); + fields.put("author", xpath("record", "metadata", "resource", "creator", "creatorName")); + fields.put("contributor", xpath("record", "metadata", "resource", "contributor", "contributorName")); + fields.put("subject", xpath("record", "metadata", "resource", "subjects", "subject")); + fields.put("format", xpath("record", "metadata", "resource", "formats", "format")); + fields.put("size", xpath("record", "metadata", "resource", "sizes", "size")); + fields.put("version", xpath("record", "metadata", "resource", "versions", "version")); + fields.put("publisher", xpath("record", "metadata", "resource", "publisher")); + fields.put("language", xpath("record", "metadata", "language")); + fields.put("resourcetype", xpath("record", "metadata", "resource", "resourceType")); + fields.put("resulttype", xpath("record", "metadata", "CobjCategory")); + + fields.put("cachedRel", String.format("%s | %s | %s | %s", + metadataXpath("projectid"), + metadataXpath("relatedPublication"), + metadataXpath("relatedDataSet"), + metadataXpath("resource") + "//*[local-name()='relatedIdentifier']")); + + return fields; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdException.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdException.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdException.java (revision 58513) @@ -0,0 +1,23 @@ +package eu.dnetlib.data.transform.xml2; + +/** + * Created by claudio on 18/01/2017. + */ +public class VtdException extends Exception { + + public VtdException(final Exception e) { + super(e); + } + + public VtdException(final Throwable e) { + super(e); + } + + public VtdException(final String msg, final Exception e) { + super(msg, e); + } + + public VtdException(final String msg, final Throwable e) { + super(msg, e); + } +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java (revision 58513) @@ -0,0 +1,181 @@ +package eu.dnetlib.data.transform.xml; + +import java.util.List; + +import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + + +/** + * OpenTrialsXsltFunctions Tester. + * + */ +public class OpenTrialsXsltFunctionsTest { + + private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; + private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; + private String jidentifiers = "{112683,NCT00920439}"; + + + private String jsonRecord = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : true},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; + private String jsonRecordNull = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\"},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; + private String jsonRecordVoid = "[{\"source_id\" : \"\", \"source_url\" : \"\", \"is_primary\" : \"\"}]"; + private String jsonRecondPrimary = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : false},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; + + private String jsonPeopleVoid ="[{\"person_name\" : null, \"person_id\" : null, \"person_role\" : null}]"; + private String jsonPeopleOne = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"}]"; + private String jsonPeopleMore = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"},{\"person_name\" : \"Miriam Pippolippo Baglioni, PhD\", \"person_id\" : \"fake\", \"person_role\" : \"principal_investigator\"}]"; + + private String jsonOrganizationVoid = "[{\"organization_name\" : null, \"organization_id\" : null, \"organization_role\" : null}]"; + private String jsonOrganizationOne = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"}]"; + private String jsonOrganizationMore = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"},{\"organization_name\" : \"Miriam Baglioni AB\", \"organization_id\" : \"fake\", \"organization_role\" : \"primary_sponsor\"}]"; + + private String jsonLocationVoid = "[{\"location_name\" : null}]"; + private String jsonLocationOne = "[{\"location_name\" : \"China\"}]"; + private String jsonLocationMore = "[{\"location_name\" : \"China\"},{\"location_name\" : \"North Korea\"}]"; + + @Before + public void before() throws Exception { + } + + @After + public void after() throws Exception { + } + + /** + * Method: getProvs(String jsonProvList) + */ + @Test + public void testGetProvs() throws Exception { + List list = OpenTrialsXsltFunctions.getProvs(jsonProv); + assertEquals(2, list.size()); + } + + /** + * Method: getMainIdentifierURL(String jsonProvList) + */ + @Test + public void testGetMainIdentifierURL() throws Exception { + String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv); + assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url ); + url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull); + assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url); + } + + @Test + public void testGetPrimaryRecordUrl(){ + String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecord); + assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); + } + + @Test + public void testGetPrimaryRecordID(){ + String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecord); + assertEquals("nct", id); + } + + @Test + public void testGetPrimaryRecordUrlNull(){ + String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordNull); + assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); + } + + @Test + public void testGetPrimaryRecordUrlVoid(){ + String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordVoid); + assertEquals("", url); + } + + @Test + public void testGetPrimaryRecordUrlNoPrimary(){ + String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecondPrimary); + assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); + } + @Test + public void testGetPrimaryRecordIDNoPrimary(){ + String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecondPrimary); + assertEquals("nct", id); + } + @Test + public void testGetPrincipalInvestigatorsVoid(){ + String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleVoid); + assertEquals("",url); + } + + + @Test + public void testGetPrincipalInvestigatorsOne(){ + String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleOne); + assertEquals("Verheul, Henk", url); + } + + @Test + public void testGetPrincipalInvestigatorsMore(){ + String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleMore); + assertEquals("Verheul, Henk@@Baglioni, Miriam Pippolippo", url); + } + + + + @Test + public void testgGetTrialOrganizationsVoid(){ + String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationVoid); + assertEquals("",url); + } + + + @Test + public void testgGetTrialOrganizationsOne(){ + String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationOne); + assertEquals("Södertälje sjukhus AB@sponsor", url); + } + + @Test + public void testgGetTrialOrganizationsMore(){ + String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationMore); + assertEquals("Södertälje sjukhus AB@sponsor@@Miriam Baglioni AB@sponsor", url); + } + + @Test + public void testgGetTrialLocationsVoid(){ + String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationVoid); + assertEquals("",url); + } + + + @Test + public void testgGetTrialLocationsOne(){ + String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationOne); + assertEquals("China", url); + } + + @Test + public void testgGetTrialLocationsMore(){ + String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationMore); + assertEquals("China@@North Korea", url); + } + + @Test + public void testGetNotPrimaryRecordUrlPrimary(){ + String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecondPrimary); + assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url); + } + + @Test + public void testGetNotPrimaryRecordUrlVoid(){ + String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecordVoid); + assertEquals("", url); + } + + @Test + public void testGetNotPrimaryRecordUrl(){ + String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecord); + assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url); + } + + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml2/VtdUtilityParserTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml2/VtdUtilityParserTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml2/VtdUtilityParserTest.java (revision 58513) @@ -0,0 +1,44 @@ +package eu.dnetlib.data.transform.xml2; + +import java.io.InputStream; +import java.util.List; + +import com.ximpleware.AutoPilot; +import com.ximpleware.VTDGen; +import com.ximpleware.VTDNav; +import eu.dnetlib.data.transform.xml2.Node; +import eu.dnetlib.data.transform.xml2.VtdUtilityParser; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.parseXml; + +public class VtdUtilityParserTest { + + @Test + public void testUtils1() { + String xpath = VtdUtilityParser.xpath("a", "b", "c"); + Assert.assertTrue("/*[local-name()='a']/*[local-name()='b']/*[local-name()='c']".equals(xpath)); + } + + @Test + public void testPartser() throws Exception { + final InputStream resource = this.getClass().getResourceAsStream("/eu/dnetlib/data/transform/publication.xml"); + final String record =IOUtils.toString(resource); + final VTDGen vg = parseXml(record); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + + List nodes = VtdUtilityParser.getNodes(ap, vn, "//*[local-name()='referenceaa']"); + + nodes.forEach(n -> Assert.assertTrue(n.getAttributes().keySet().size()>0)); + + System.out.println(VtdUtilityParser.countNodes(ap, vn, "count(//*[local-name()='CobjIdentifier'])")); + + + + + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultDom4jParser.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultDom4jParser.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultDom4jParser.java (revision 58513) @@ -0,0 +1,472 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.function.Function; + +import com.google.common.collect.Streams; +import com.google.protobuf.Descriptors.Descriptor; +import eu.dnetlib.data.proto.FieldTypeProtos.*; +import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription; +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.*; +import eu.dnetlib.data.proto.TypeProtos.Type; +import eu.dnetlib.miscutils.collections.Pair; +import eu.dnetlib.pace.model.Person; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; + +import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSimpleId; +import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSplitId; +import static eu.dnetlib.data.transform.xml2.Dom4jUtilityParser.*; +import static eu.dnetlib.data.transform.xml2.Utils.*; +import static java.lang.String.format; + +public abstract class AbstractResultDom4jParser implements Function { + + private static final Log log = LogFactory.getLog(AbstractResultDom4jParser.class); + + protected boolean invisible = false; + protected String provenance = ""; + protected String trust = "0.9"; + + protected SpecificationMap specs; + + public AbstractResultDom4jParser(final Map fields) { + this.specs = buildSpecs(fields); + } + + public AbstractResultDom4jParser(final boolean invisible, final String provenance, final String trust, final Map fields) { + this(fields); + this.invisible = invisible; + this.provenance = provenance; + this.trust = trust; + } + + protected abstract String getResulttype(final String cobjcategory); + + @Override + public Oaf apply(final String xml) { + try { + final Document document = parseXml(xml); + + final boolean skiprecord = Boolean.valueOf(getFirstValue(document, xpath("record", "header", "skipRecord"))); + int metadata = countNodes(document, format("count(%s)", xpath("record", "metadata"))); + + if (metadata == 0 || skiprecord) { + return null; + } + + final String objIdentifier = oafSimpleId(Type.result.name(), getFirstValue(document, xpath("record", "header", "objIdentifier"))); + if (StringUtils.isBlank(objIdentifier)) { + return null; + } + + for(final Entry spec : specs.entrySet()) { + final Descriptor d = spec.getKey(); + final SpecificationDescriptor md = spec.getValue(); + + for(Entry, Object>>> entry : md.getFields().entrySet()) { + final String fieldName = entry.getKey(); + final Pair, Object>> pair = entry.getValue(); + final String xpath = pair.getKey(); + final Function, Object> function = pair.getValue(); + try { + addField(md.getBuilder(), d.findFieldByName(fieldName), function.apply(getNodes(document, xpath))); + } catch (Throwable e) { + throw new VtdException(String.format("Error mapping field '%s' from xpath '%s' for record '%s'", fieldName, xpath, objIdentifier), e); + } + } + } + + return Oaf.newBuilder() + .setKind(Kind.entity) + .setDataInfo(ensureDataInfo(document, DataInfo.newBuilder())) + .setEntity(((OafEntity.Builder) specs.get(OafEntity.getDescriptor()) + .getBuilder() + .setField( + OafEntity.getDescriptor().findFieldByName(Type.result.name()), + ((Builder) specs.get(Result.getDescriptor()).getBuilder()) + .setMetadata((Metadata) specs.get(Metadata.getDescriptor()).getBuilder().build()) + .addInstance((Instance) specs.get(Instance.getDescriptor()).getBuilder().build()) + .build())) + .setId(objIdentifier) + .setOaiprovenance(getOaiProvenance(document)) + .build()) + .build(); + } catch (Throwable e) { + log.error(xml); + log.error(ExceptionUtils.getStackTrace(e)); + return null; + } + } + + public SpecificationMap buildSpecs(final Map fields) { + final SpecificationMap specs = new SpecificationMap(); + + specs.put(Result.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Result.newBuilder()) + .put("externalReference", fields.get("externalReference"), nodes -> nodes.stream() + .map(node -> { + final ExternalReference.Builder extref = ExternalReference.newBuilder(); + if (StringUtils.isNotBlank(node.getTextValue())) { + extref.setUrl(node.getTextValue()); + } + final Map a = node.getAttributes(); + final String source = a.get("source"); + if (StringUtils.isNotBlank(source)) { + extref.setSitename(source); + } + final String identifier = a.get("identifier"); + if (StringUtils.isNotBlank(identifier)) { + extref.setRefidentifier(identifier); + } + final String title = a.get("title"); + if (StringUtils.isNotBlank(title)) { + extref.setLabel(title); + } + final String query = a.get("query"); + if (StringUtils.isNotBlank(query)) { + extref.setQuery(query); + } + final String type = a.get("type"); + if (StringUtils.isNotBlank(type)) { + extref.setQualifier(getSimpleQualifier(type, DNET_EXT_REF_TYPOLOGIES)); + } + return extref.build(); + })); + + specs.put(Instance.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Instance.newBuilder()) + .put("license", fields.get("license"), nodes -> nodes.stream() + .filter(node -> { + final Map a = node.getAttributes(); + switch (node.getName()) { + case "rights": + return a.containsKey(RIGHTS_URI) && a.get(RIGHTS_URI).matches(URL_REGEX); + case "license": + return true; + default: + return false; + } + }) + .map(Node::getTextValue)) + .put("accessright", fields.get("accessright"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(rights -> mappingAccess.containsKey(rights) ? mappingAccess.get(rights) : "UNKNOWN") + .map(code -> getQualifier(code, getClassName(code), DNET_ACCESS_MODES, DNET_ACCESS_MODES))) + .put("instancetype", fields.get("instancetype"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(code -> getQualifier(code, getClassName(code), DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE))) + .put("hostedby", fields.get("hostedby"), nodes -> nodes.stream() + .map(node -> getKV(oafSplitId("datasource", node.getAttributes().get("id")), node.getAttributes().get("name")))) + .put("url", fields.get("url"), nodes -> nodes.stream() + .map(Node::getTextValue) + .filter(s -> s.trim().matches(URL_REGEX))) + .put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream() + .map(Node::getTextValue)); + + specs.put(Metadata.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Metadata.newBuilder()) + .put("title", fields.get("title"), nodes -> nodes.stream() + .map(node -> { + final Qualifier.Builder q = Qualifier.newBuilder().setSchemeid(DNET_TITLE_TYPOLOGIES).setSchemename(DNET_TITLE_TYPOLOGIES); + switch (node.getAttributes().get(TITLE_TYPE) + "") { + case "AlternativeTitle": + q.setClassid("alternative title").setClassname("alternative title"); + break; + case "Subtitle": + q.setClassid("subtitle").setClassname("subtitle"); + break; + case "TranslatedTitle": + q.setClassid("translated title").setClassname("translated title"); + break; + default: + q.setClassid("main title").setClassname("main title"); + break; + } + return StructuredProperty.newBuilder().setValue(node.getTextValue()).setQualifier(q).build(); + })) + .put("description", fields.get("description"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("storagedate", fields.get("storagedate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("lastmetadataupdate", fields.get("lastmetadataupdate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("embargoenddate", fields.get("embargoenddate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("author", fields.get("author"), nodes -> Streams.mapWithIndex( + nodes.stream() + .map(Node::getTextValue), + (creator, i) -> new Pair<>(i, creator)) + .map(pair -> { + final Author.Builder author = Author.newBuilder(); + author.setFullname(pair.getValue()); + author.setRank(pair.getKey().intValue() + 1); + final Person p = new Person(pair.getValue(), false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + } + return author.build(); + })) + .put("contributor", fields.get("contributor"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("subject", fields.get("subject"), nodes -> nodes.stream() + .map(node -> { + final Map a = node.getAttributes(); + final String classId = StringUtils.isNotBlank(a.get(CLASSID)) ? a.get(CLASSID) : KEYWORD; + final String className = StringUtils.isNotBlank(a.get(CLASSNAME)) ? a.get(CLASSNAME) : KEYWORD; + final String schemeId = StringUtils.isNotBlank(a.get(SCHEMEID)) ? a.get(SCHEMEID) : DNET_SUBJECT_TYPOLOGIES; + final String schemeName = StringUtils.isNotBlank(a.get(SCHEMENAME)) ? a.get(SCHEMENAME) : DNET_SUBJECT_TYPOLOGIES; + return getStructuredProperty(node.getTextValue(), classId, className, schemeId, schemeName); + })) + .put("format", fields.get("format"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("source", fields.get("source"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("size", fields.get("size"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("version", fields.get("version"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("publisher", fields.get("publisher"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("language", fields.get("language"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(code -> getQualifier(code, getClassName(code), DNET_LANGUAGES, DNET_LANGUAGES))) + .put("resourcetype", fields.get("resourcetype"), nodes -> nodes.stream() + .map(node -> node.getAttributes().get("resourceTypeGeneral")) + .map(resourceType -> getSimpleQualifier(resourceType, DNET_DATA_CITE_RESOURCE))) + .put("resulttype", fields.get("resulttype"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(cobjcategory -> getSimpleQualifier(getResulttype(cobjcategory), DNET_RESULT_TYPOLOGIES))) + .put("concept", fields.get("concept"), nodes -> nodes.stream() + .filter(node -> node.getAttributes() != null && StringUtils.isNotBlank(node.getAttributes().get("id"))) + .map(node -> Context.newBuilder().setId(node.getAttributes().get("id")))) + .put("journal", fields.get("journal"), nodes -> nodes.stream() + .map(node -> { + final Journal.Builder journal = Journal.newBuilder(); + if (StringUtils.isNotBlank(node.getTextValue())) { + journal.setName(node.getTextValue()); + } + if (node.getAttributes() != null) { + final Map a = node.getAttributes(); + if (StringUtils.isNotBlank(a.get("issn"))) { + journal.setIssnPrinted(a.get("issn")); + } + if (StringUtils.isNotBlank(a.get("eissn"))) { + journal.setIssnOnline(a.get("eissn")); + } + if (StringUtils.isNotBlank(a.get("lissn"))) { + journal.setIssnLinking(a.get("lissn")); + } + if (StringUtils.isNotBlank(a.get("sp"))) { + journal.setSp(a.get("sp")); + } + if (StringUtils.isNotBlank(a.get("ep"))) { + journal.setEp(a.get("ep")); + } + if (StringUtils.isNotBlank(a.get("iss"))) { + journal.setIss(a.get("iss")); + } + if (StringUtils.isNotBlank(a.get("vol"))) { + journal.setVol(a.get("vol")); + } + } + return journal; + })); + + specs.put(OafEntity.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(OafEntity.newBuilder().setType(Type.result)) + .put("originalId", fields.get("originalId"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(s -> StringUtils.contains(s, ID_SEPARATOR) ? StringUtils.substringAfter(s, ID_SEPARATOR) : s) + .filter(s -> !s.trim().matches(URL_REGEX))) + .put("collectedfrom", fields.get("collectedfrom"), nodes -> nodes.stream() + .map(node -> getKV( + oafSplitId(Type.datasource.name(), node.getAttributes().get("id")), + node.getAttributes().get("name")))) + .put("pid", fields.get("pid"), nodes -> nodes.stream() + .filter(pid -> { + final Map a = pid.getAttributes(); + return a.containsKey(IDENTIFIER_TYPE) || a.containsKey(ALTERNATE_IDENTIFIER_TYPE); + }) + .filter(pid -> { + final Map a = pid.getAttributes(); + return !"url".equalsIgnoreCase(a.get(IDENTIFIER_TYPE)) && !"url".equalsIgnoreCase(a.get(ALTERNATE_IDENTIFIER_TYPE)); + }) + .map(pid -> { + final Map a = pid.getAttributes(); + final String identifierType = a.get(IDENTIFIER_TYPE); + final String altIdentifierType = a.get(ALTERNATE_IDENTIFIER_TYPE); + return StructuredProperty.newBuilder() + .setValue(pid.getTextValue()) + .setQualifier(getSimpleQualifier( + StringUtils.isNotBlank(identifierType) ? + identifierType : altIdentifierType, DNET_PID_TYPES)) + .build(); + })) + .put("dateofcollection", fields.get("dateofcollection"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("dateoftransformation", fields.get("dateoftransformation"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("cachedRel", fields.get("cachedRel"), nodes -> nodes.stream() + .map(node -> getOafRel(node, + OafRel.newBuilder() + .setSource("") + .setChild(false))) + .filter(Objects::nonNull) + .map(oafRel -> oafRel.build())); + return specs; + } + + private static OafRel.Builder getOafRel(final Node node, final OafRel.Builder oafRel) { + final Map a = node.getAttributes(); + + switch (node.getName()) { + case PROJECTID: + if (StringUtils.isBlank(node.getTextValue())) { + return null; + } + return oafRel + .setTarget(oafSplitId(Type.project.name(), StringUtils.trim(node.getTextValue()))) + .setRelType(RelType.resultProject) + .setSubRelType(SubRelType.outcome) + .setRelClass("isProducedBy"); + + case RELATED_PUBLICATION: + case RELATED_DATASET: + if (StringUtils.isBlank(a.get("id"))) { + return null; + } + return oafRel + .setTarget(oafSimpleId(Type.result.name(), StringUtils.trim(a.get("id")))) + .setRelType(RelType.resultResult) + .setSubRelType(SubRelType.publicationDataset) + .setRelClass("isRelatedTo"); + + case RELATED_IDENTIFIER: + if (StringUtils.isBlank(node.getTextValue())) { + return null; + } + return oafRel + .setTarget(node.getTextValue()) + .setRelType(RelType.resultResult) + .setSubRelType(SubRelType.relationship) + .setRelClass(a.get(RELATION_TYPE)) + .setCachedTarget( + OafEntity.newBuilder() + .setType(Type.result) + .setId("") //TODO + .addPid( + StructuredProperty.newBuilder() + .setValue(node.getTextValue()) + .setQualifier(getSimpleQualifier(a.get(RELATED_IDENTIFIER_TYPE), DNET_PID_TYPES)) + .build())); + default: + return null; + } + } + + private OriginDescription getOriginDescription(final Document document, final String basePath) throws VtdException { + final OriginDescription.Builder od = OriginDescription.newBuilder(); + if (getNodes(document, basePath).isEmpty()) { + return od.build(); + } + final Map odAttr = getNode(document, basePath).getAttributes(); + + final String harvestDate = odAttr.get("harvestDate"); + if (StringUtils.isNotBlank(harvestDate)) { + od.setHarvestDate(harvestDate); + } + final String altered = odAttr.get("altered"); + if (StringUtils.isNotBlank(altered)) { + od.setAltered(Boolean.valueOf(altered)); + } + final String baseUrl = getFirstValue(document, basePath + xpath("baseURL")); + if (StringUtils.isNotBlank(basePath)) { + od.setBaseURL(baseUrl); + } + final String identifier = getFirstValue(document, basePath + xpath("identifier")); + if (StringUtils.isNotBlank(identifier)) { + od.setIdentifier(identifier); + } + final String datestamp = getFirstValue(document, basePath + xpath("datestamp")); + if (StringUtils.isNotBlank(datestamp)) { + od.setDatestamp(datestamp); + } + final String metadataNamespace = getFirstValue(document, basePath + xpath("metadataNamespace")); + if (StringUtils.isNotBlank(metadataNamespace)) { + od.setMetadataNamespace(metadataNamespace); + } + final OriginDescription originDescription = getOriginDescription(document, basePath + xpath("originDescription")); + if (originDescription.hasHarvestDate()) { + od.setOriginDescription(originDescription); + } + + return od.build(); + } + + private OAIProvenance getOaiProvenance(final Document document) throws VtdException { + return OAIProvenance.newBuilder() + .setOriginDescription(getOriginDescription(document, xpath("record", "about", "provenance", "originDescription"))) + .build(); + } + + private DataInfo.Builder ensureDataInfo( + final Document document, + final DataInfo.Builder info) throws VtdException { + + if (info.isInitialized()) return info; + return buildDataInfo(document, invisible, provenance, trust, false, false); + } + + private DataInfo.Builder buildDataInfo( + final Document document, + final boolean invisible, + final String defaultProvenanceaction, + final String defaultTrust, + final boolean defaultDeletedbyinference, + final boolean defaultInferred) throws VtdException { + + final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder() + .setInvisible(invisible) + .setInferred(defaultInferred) + .setDeletedbyinference(defaultDeletedbyinference) + .setTrust(defaultTrust) + .setProvenanceaction(getSimpleQualifier(defaultProvenanceaction, DNET_PROVENANCE_ACTIONS)); + + // checking instanceof because when receiving an empty we don't want to parse it. + + final String xpath = xpath("record", "about", "datainfo"); + if (getNodes(document, xpath).size() > 0) { + final Map provAction = getNode(document, xpath + xpath("provenanceaction")).getAttributes(); + dataInfoBuilder + .setInvisible(Boolean.valueOf(getValue(getNode(document, xpath + xpath("invisible")), String.valueOf(invisible)))) + .setInferred(Boolean.valueOf(getValue(getNode(document, xpath + xpath("inferred")), String.valueOf(defaultInferred)))) + .setDeletedbyinference(Boolean.valueOf( + getValue(getNode(document, xpath + xpath("deletedbyinference")), String.valueOf(defaultDeletedbyinference)))) + .setTrust(getValue(getNode(document, xpath + xpath("trust")), defaultTrust)) + .setInferenceprovenance(getValue(getNode(document, xpath + xpath("inferenceprovenance")), "")) + .setProvenanceaction(getSimpleQualifier( + getValue(provAction.get(CLASSID), defaultProvenanceaction), + DNET_PROVENANCE_ACTIONS)); + } + + return dataInfoBuilder; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/ConfigurationTestConfig.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/ConfigurationTestConfig.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/vtd/ConfigurationTestConfig.java (revision 58513) @@ -0,0 +1,31 @@ +package eu.dnetlib.data.transform.xml.vtd; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +import com.mongodb.MongoClient; +import com.mongodb.client.MongoDatabase; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.ClassPathResource; + +@Configuration +public class ConfigurationTestConfig { + + @Bean + public MongoDatabase db() throws IOException { + + final Properties p = new Properties(); + final ClassPathResource cp = new ClassPathResource("test.properties"); + try (final InputStream stream = cp.getInputStream()) { + p.load(stream); + } + + final MongoClient mongo = new MongoClient( + p.getProperty("mongodb.host"), + Integer.valueOf(p.getProperty("mongodb.port"))); + return mongo.getDatabase(p.getProperty("mongodb.dbname")); + } + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/FWFXsltFunctionsTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/FWFXsltFunctionsTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/xml/FWFXsltFunctionsTest.java (revision 58513) @@ -0,0 +1,113 @@ +package eu.dnetlib.data.transform.xml; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * + * Created by miriam on 04/05/2017. + */ +public class FWFXsltFunctionsTest { + private String namesurname ="Gerhard SOMMER"; + private String noSurname = "Gerhard"; + private String noName = "SOMMER"; + private String twoNames = "Gerhard Pippo SOMMER"; + private String twoSurname = "Gerhard PIPPO SOMMER"; + private String nonamesurname = ""; + private String organization ="Universität Linz - Institut für Computational Perception; Universität für Musik und darstellende Kunst Graz - Institut 1: Komposition, Musiktheorie, Musikgeschichte und Dirigieren; Universität Mozarteum Salzburg - Institut für Musikalische Rezeptions- und Interpretationsgeschichte; Anton Bruckner Privatuniversität - Institut für Theorie und Geschichte der Musik; Eliette und Herbert von Karajan Institut - Eliette und Herbert von Karajan Institut"; + + @Before + public void before() throws Exception { + } + + @After + public void after() throws Exception { + } + + @Test + public void testGetNamesNameNoNameSurname() throws Exception { + String ret = FWFXsltFunctions.getName(nonamesurname,true); + assertEquals("",ret ); + } + + @Test + public void testGetNamesSurnameNoNameSurname() throws Exception { + String ret = FWFXsltFunctions.getName(nonamesurname,false); + assertEquals("",ret ); + } + + @Test + public void testGetNamesNameTwoSurname() throws Exception { + String ret = FWFXsltFunctions.getName(twoSurname,true); + assertEquals("Gerhard",ret ); + } + + @Test + public void testGetNamesSurnameTwoSurname() throws Exception { + String ret = FWFXsltFunctions.getName(twoSurname,false); + assertEquals("PIPPO SOMMER",ret ); + } + + @Test + public void testGetNamesNameTwoNames() throws Exception { + String ret = FWFXsltFunctions.getName(twoNames,true); + assertEquals("Gerhard Pippo",ret ); + } + + @Test + public void testGetNamesSurnameTwoNames() throws Exception { + String ret = FWFXsltFunctions.getName(twoNames,false); + assertEquals("SOMMER",ret ); + } + + /** + * Method: getProvs(String jsonProvList) + */ + @Test + public void testGetNamesName() throws Exception { + String ret = FWFXsltFunctions.getName(namesurname,true); + assertEquals("Gerhard",ret ); + } + + @Test + public void testGetNamesSurname() throws Exception { + String ret = FWFXsltFunctions.getName(namesurname,false); + assertEquals("SOMMER",ret ); + } + + @Test + public void testGetNamesNameNoSurname() throws Exception { + String ret = FWFXsltFunctions.getName(noSurname,true); + assertEquals("Gerhard",ret ); + } + + @Test + public void testGetNamesSurnameNoSurname() throws Exception { + String ret = FWFXsltFunctions.getName(noSurname,false); + assertEquals("",ret ); + } + + @Test + public void testGetNamesNameNoName() throws Exception { + String ret = FWFXsltFunctions.getName(noName,true); + assertEquals("",ret ); + } + + @Test + public void testGetNamesSurnameNoName() throws Exception { + String ret = FWFXsltFunctions.getName(noName,false); + assertEquals("SOMMER",ret ); + } + + @Test + public void TestGetMd5()throws Exception{ + String md5 = FWFXsltFunctions.getMd5(organization); + System.out.println(md5); + assertNotNull(md5); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultVtdParser.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultVtdParser.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/AbstractResultVtdParser.java (revision 58513) @@ -0,0 +1,478 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.function.Function; + +import com.google.common.collect.Streams; +import com.google.protobuf.Descriptors.Descriptor; +import com.ximpleware.AutoPilot; +import com.ximpleware.VTDGen; +import com.ximpleware.VTDNav; +import eu.dnetlib.data.proto.FieldTypeProtos; +import eu.dnetlib.data.proto.FieldTypeProtos.*; +import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription; +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.*; +import eu.dnetlib.data.proto.TypeProtos.Type; +import eu.dnetlib.miscutils.collections.Pair; +import eu.dnetlib.pace.model.Person; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSimpleId; +import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSplitId; +import static eu.dnetlib.data.transform.xml2.Utils.*; +import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.*; +import static java.lang.String.format; + +public abstract class AbstractResultVtdParser implements Function { + + private static final Log log = LogFactory.getLog(AbstractResultVtdParser.class); + + protected boolean invisible = false; + protected String provenance = ""; + protected String trust = "0.9"; + + protected SpecificationMap specs; + + public AbstractResultVtdParser(final Map fields) { + this.specs = buildSpecs(fields); + } + + public AbstractResultVtdParser(final boolean invisible, final String provenance, final String trust, final Map fields) { + this(fields); + this.invisible = invisible; + this.provenance = provenance; + this.trust = trust; + } + + protected abstract String getResulttype(final String cobjcategory); + + @Override + public Oaf apply(final String xml) { + try { + final VTDGen vg = parseXml(xml); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + + final boolean skiprecord = Boolean.valueOf(getFirstValue(ap, vn, xpath("record", "header", "skipRecord"))); + int metadata = countNodes(ap, vn, format("count(%s)", xpath("record", "metadata"))); + + if (metadata == 0 || skiprecord) { + return null; + } + + final String objIdentifier = oafSimpleId(Type.result.name(), getFirstValue(ap, vn, xpath("record", "header", "objIdentifier"))); + if (StringUtils.isBlank(objIdentifier)) { + return null; + } + + for(final Entry spec : specs.entrySet()) { + final Descriptor d = spec.getKey(); + final SpecificationDescriptor md = spec.getValue(); + + for(Entry, Object>>> entry : md.getFields().entrySet()) { + final String fieldName = entry.getKey(); + final Pair, Object>> pair = entry.getValue(); + final String xpath = pair.getKey(); + final Function, Object> function = pair.getValue(); + try { + addField(md.getBuilder(), d.findFieldByName(fieldName), function.apply(getNodes(ap, vn, xpath))); + } catch (Throwable e) { + throw new VtdException(String.format("Error mapping field '%s' from xpath '%s' for record '%s'", fieldName, xpath, objIdentifier), e); + } + } + } + + return Oaf.newBuilder() + .setKind(Kind.entity) + .setDataInfo(ensureDataInfo(ap, vn, DataInfo.newBuilder())) + .setEntity(((OafEntity.Builder) specs.get(OafEntity.getDescriptor()) + .getBuilder() + .setField( + OafEntity.getDescriptor().findFieldByName(Type.result.name()), + ((Result.Builder) specs.get(Result.getDescriptor()).getBuilder()) + .setMetadata((Metadata) specs.get(Metadata.getDescriptor()).getBuilder().build()) + .addInstance((Instance) specs.get(Instance.getDescriptor()).getBuilder().build()) + .build())) + .setId(objIdentifier) + .setOaiprovenance(getOaiProvenance(ap, vn)) + .build()) + .build(); + } catch (Throwable e) { + log.error(xml); + log.error(ExceptionUtils.getStackTrace(e)); + return null; + } + } + + public SpecificationMap buildSpecs(final Map fields) { + final SpecificationMap specs = new SpecificationMap(); + + specs.put(Result.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Result.newBuilder()) + .put("externalReference", fields.get("externalReference"), nodes -> nodes.stream() + .map(node -> { + final ExternalReference.Builder extref = ExternalReference.newBuilder(); + if (StringUtils.isNotBlank(node.getTextValue())) { + extref.setUrl(node.getTextValue()); + } + final Map a = node.getAttributes(); + final String source = a.get("source"); + if (StringUtils.isNotBlank(source)) { + extref.setSitename(source); + } + final String identifier = a.get("identifier"); + if (StringUtils.isNotBlank(identifier)) { + extref.setRefidentifier(identifier); + } + final String title = a.get("title"); + if (StringUtils.isNotBlank(title)) { + extref.setLabel(title); + } + final String query = a.get("query"); + if (StringUtils.isNotBlank(query)) { + extref.setQuery(query); + } + final String type = a.get("type"); + if (StringUtils.isNotBlank(type)) { + extref.setQualifier(getSimpleQualifier(type, DNET_EXT_REF_TYPOLOGIES)); + } + return extref.build(); + })); + + specs.put(Instance.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Instance.newBuilder()) + .put("license", fields.get("license"), nodes -> nodes.stream() + .filter(node -> { + final Map a = node.getAttributes(); + switch (node.getName()) { + case "rights": + return a.containsKey(RIGHTS_URI) && a.get(RIGHTS_URI).matches(URL_REGEX); + case "license": + return true; + default: + return false; + } + }) + .map(Node::getTextValue)) + .put("accessright", fields.get("accessright"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(rights -> mappingAccess.containsKey(rights) ? mappingAccess.get(rights) : "UNKNOWN") + .map(code -> getQualifier(code, getClassName(code), DNET_ACCESS_MODES, DNET_ACCESS_MODES))) + .put("instancetype", fields.get("instancetype"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(code -> getQualifier(code, getClassName(code), DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE))) + .put("hostedby", fields.get("hostedby"), nodes -> nodes.stream() + .map(node -> getKV(oafSplitId("datasource", node.getAttributes().get("id")), node.getAttributes().get("name")))) + .put("url", fields.get("url"), nodes -> nodes.stream() + .map(Node::getTextValue) + .filter(s -> s.trim().matches(URL_REGEX))) + .put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream() + .map(Node::getTextValue)); + + specs.put(Metadata.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(Metadata.newBuilder()) + .put("title", fields.get("title"), nodes -> nodes.stream() + .map(node -> { + final Qualifier.Builder q = Qualifier.newBuilder().setSchemeid(DNET_TITLE_TYPOLOGIES).setSchemename(DNET_TITLE_TYPOLOGIES); + switch (node.getAttributes().get(TITLE_TYPE) + "") { + case "AlternativeTitle": + q.setClassid("alternative title").setClassname("alternative title"); + break; + case "Subtitle": + q.setClassid("subtitle").setClassname("subtitle"); + break; + case "TranslatedTitle": + q.setClassid("translated title").setClassname("translated title"); + break; + default: + q.setClassid("main title").setClassname("main title"); + break; + } + return StructuredProperty.newBuilder().setValue(node.getTextValue()).setQualifier(q).build(); + })) + .put("description", fields.get("description"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("storagedate", fields.get("storagedate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("lastmetadataupdate", fields.get("lastmetadataupdate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("embargoenddate", fields.get("embargoenddate"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("author", fields.get("author"), nodes -> Streams.mapWithIndex( + nodes.stream() + .map(Node::getTextValue), + (creator, i) -> new Pair<>(i, creator)) + .map(pair -> { + final Author.Builder author = Author.newBuilder(); + author.setFullname(pair.getValue()); + author.setRank(pair.getKey().intValue() + 1); + final Person p = new Person(pair.getValue(), false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + } + return author.build(); + })) + .put("contributor", fields.get("contributor"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("subject", fields.get("subject"), nodes -> nodes.stream() + .map(node -> { + final Map a = node.getAttributes(); + final String classId = StringUtils.isNotBlank(a.get(CLASSID)) ? a.get(CLASSID) : KEYWORD; + final String className = StringUtils.isNotBlank(a.get(CLASSNAME)) ? a.get(CLASSNAME) : KEYWORD; + final String schemeId = StringUtils.isNotBlank(a.get(SCHEMEID)) ? a.get(SCHEMEID) : DNET_SUBJECT_TYPOLOGIES; + final String schemeName = StringUtils.isNotBlank(a.get(SCHEMENAME)) ? a.get(SCHEMENAME) : DNET_SUBJECT_TYPOLOGIES; + return getStructuredProperty(node.getTextValue(), classId, className, schemeId, schemeName); + })) + .put("format", fields.get("format"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("source", fields.get("source"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("size", fields.get("size"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("version", fields.get("version"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("publisher", fields.get("publisher"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("language", fields.get("language"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(code -> getQualifier(code, getClassName(code), DNET_LANGUAGES, DNET_LANGUAGES))) + .put("resourcetype", fields.get("resourcetype"), nodes -> nodes.stream() + .map(node -> node.getAttributes().get("resourceTypeGeneral")) + .map(resourceType -> getSimpleQualifier(resourceType, DNET_DATA_CITE_RESOURCE))) + .put("resulttype", fields.get("resulttype"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(cobjcategory -> getSimpleQualifier(getResulttype(cobjcategory), DNET_RESULT_TYPOLOGIES))) + .put("concept", fields.get("concept"), nodes -> nodes.stream() + .filter(node -> node.getAttributes() != null && StringUtils.isNotBlank(node.getAttributes().get("id"))) + .map(node -> Context.newBuilder().setId(node.getAttributes().get("id")))) + .put("journal", fields.get("journal"), nodes -> nodes.stream() + .map(node -> { + final Journal.Builder journal = Journal.newBuilder(); + if (StringUtils.isNotBlank(node.getTextValue())) { + journal.setName(node.getTextValue()); + } + if (node.getAttributes() != null) { + final Map a = node.getAttributes(); + if (StringUtils.isNotBlank(a.get("issn"))) { + journal.setIssnPrinted(a.get("issn")); + } + if (StringUtils.isNotBlank(a.get("eissn"))) { + journal.setIssnOnline(a.get("eissn")); + } + if (StringUtils.isNotBlank(a.get("lissn"))) { + journal.setIssnLinking(a.get("lissn")); + } + if (StringUtils.isNotBlank(a.get("sp"))) { + journal.setSp(a.get("sp")); + } + if (StringUtils.isNotBlank(a.get("ep"))) { + journal.setEp(a.get("ep")); + } + if (StringUtils.isNotBlank(a.get("iss"))) { + journal.setIss(a.get("iss")); + } + if (StringUtils.isNotBlank(a.get("vol"))) { + journal.setVol(a.get("vol")); + } + } + return journal; + })); + + specs.put(OafEntity.getDescriptor(), SpecificationDescriptor.newInstance()) + .setBuilder(OafEntity.newBuilder().setType(Type.result)) + .put("originalId", fields.get("originalId"), nodes -> nodes.stream() + .map(Node::getTextValue) + .map(s -> StringUtils.contains(s, ID_SEPARATOR) ? StringUtils.substringAfter(s, ID_SEPARATOR) : s) + .filter(s -> !s.trim().matches(URL_REGEX))) + .put("collectedfrom", fields.get("collectedfrom"), nodes -> nodes.stream() + .map(node -> getKV( + oafSplitId(Type.datasource.name(), node.getAttributes().get("id")), + node.getAttributes().get("name")))) + .put("pid", fields.get("pid"), nodes -> nodes.stream() + .filter(pid -> { + final Map a = pid.getAttributes(); + return a.containsKey(IDENTIFIER_TYPE) || a.containsKey(ALTERNATE_IDENTIFIER_TYPE); + }) + .filter(pid -> { + final Map a = pid.getAttributes(); + return !"url".equalsIgnoreCase(a.get(IDENTIFIER_TYPE)) && !"url".equalsIgnoreCase(a.get(ALTERNATE_IDENTIFIER_TYPE)); + }) + .map(pid -> { + final Map a = pid.getAttributes(); + final String identifierType = a.get(IDENTIFIER_TYPE); + final String altIdentifierType = a.get(ALTERNATE_IDENTIFIER_TYPE); + return StructuredProperty.newBuilder() + .setValue(pid.getTextValue()) + .setQualifier(getSimpleQualifier( + StringUtils.isNotBlank(identifierType) ? + identifierType : altIdentifierType, DNET_PID_TYPES)) + .build(); + })) + .put("dateofcollection", fields.get("dateofcollection"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("dateoftransformation", fields.get("dateoftransformation"), nodes -> nodes.stream() + .map(Node::getTextValue)) + .put("cachedRel", fields.get("cachedRel"), nodes -> nodes.stream() + .map(node -> getOafRel(node, + OafRel.newBuilder() + .setSource("") + .setChild(false))) + .filter(Objects::nonNull) + .map(oafRel -> oafRel.build())); + return specs; + } + + private static OafRel.Builder getOafRel(final Node node, final OafRel.Builder oafRel) { + final Map a = node.getAttributes(); + + switch (node.getName()) { + case PROJECTID: + if (StringUtils.isBlank(node.getTextValue())) { + return null; + } + return oafRel + .setTarget(oafSplitId(Type.project.name(), StringUtils.trim(node.getTextValue()))) + .setRelType(RelType.resultProject) + .setSubRelType(SubRelType.outcome) + .setRelClass("isProducedBy"); + + case RELATED_PUBLICATION: + case RELATED_DATASET: + if (StringUtils.isBlank(a.get("id"))) { + return null; + } + return oafRel + .setTarget(oafSimpleId(Type.result.name(), StringUtils.trim(a.get("id")))) + .setRelType(RelType.resultResult) + .setSubRelType(SubRelType.publicationDataset) + .setRelClass("isRelatedTo"); + + case RELATED_IDENTIFIER: + if (StringUtils.isBlank(node.getTextValue())) { + return null; + } + return oafRel + .setTarget(node.getTextValue()) + .setRelType(RelType.resultResult) + .setSubRelType(SubRelType.relationship) + .setRelClass(a.get(RELATION_TYPE)) + .setCachedTarget( + OafEntity.newBuilder() + .setType(Type.result) + .setId("") //TODO + .addPid( + StructuredProperty.newBuilder() + .setValue(node.getTextValue()) + .setQualifier(getSimpleQualifier(a.get(RELATED_IDENTIFIER_TYPE), DNET_PID_TYPES)) + .build())); + default: + return null; + } + } + + private OriginDescription getOriginDescription(final AutoPilot ap, final VTDNav vn, final String basePath) throws VtdException { + final OriginDescription.Builder od = OriginDescription.newBuilder(); + if (getNodes(ap, vn, basePath).isEmpty()) { + return od.build(); + } + final Map odAttr = getNode(ap, vn, basePath).getAttributes(); + + final String harvestDate = odAttr.get("harvestDate"); + if (StringUtils.isNotBlank(harvestDate)) { + od.setHarvestDate(harvestDate); + } + final String altered = odAttr.get("altered"); + if (StringUtils.isNotBlank(altered)) { + od.setAltered(Boolean.valueOf(altered)); + } + final String baseUrl = getFirstValue(ap, vn, basePath + xpath("baseURL")); + if (StringUtils.isNotBlank(basePath)) { + od.setBaseURL(baseUrl); + } + final String identifier = getFirstValue(ap, vn, basePath + xpath("identifier")); + if (StringUtils.isNotBlank(identifier)) { + od.setIdentifier(identifier); + } + final String datestamp = getFirstValue(ap, vn, basePath + xpath("datestamp")); + if (StringUtils.isNotBlank(datestamp)) { + od.setDatestamp(datestamp); + } + final String metadataNamespace = getFirstValue(ap, vn, basePath + xpath("metadataNamespace")); + if (StringUtils.isNotBlank(metadataNamespace)) { + od.setMetadataNamespace(metadataNamespace); + } + final OriginDescription originDescription = getOriginDescription(ap, vn, basePath + xpath("originDescription")); + if (originDescription.hasHarvestDate()) { + od.setOriginDescription(originDescription); + } + + return od.build(); + } + + private OAIProvenance getOaiProvenance(final AutoPilot ap, final VTDNav vn) throws VtdException { + return OAIProvenance.newBuilder() + .setOriginDescription(getOriginDescription(ap, vn, xpath("record", "about", "provenance", "originDescription"))) + .build(); + } + + private FieldTypeProtos.DataInfo.Builder ensureDataInfo( + final AutoPilot ap, final VTDNav vn, + final DataInfo.Builder info) throws VtdException { + + if (info.isInitialized()) return info; + return buildDataInfo( ap, vn, invisible, provenance, trust, false, false); + } + + private FieldTypeProtos.DataInfo.Builder buildDataInfo( + final AutoPilot ap, + final VTDNav vn, + final boolean invisible, + final String defaultProvenanceaction, + final String defaultTrust, + final boolean defaultDeletedbyinference, + final boolean defaultInferred) throws VtdException { + + final DataInfo.Builder dataInfoBuilder = FieldTypeProtos.DataInfo.newBuilder() + .setInvisible(invisible) + .setInferred(defaultInferred) + .setDeletedbyinference(defaultDeletedbyinference) + .setTrust(defaultTrust) + .setProvenanceaction(getSimpleQualifier(defaultProvenanceaction, DNET_PROVENANCE_ACTIONS)); + + // checking instanceof because when receiving an empty we don't want to parse it. + + final String xpath = xpath("record", "about", "datainfo"); + if (getNodes(ap, vn, xpath).size() > 0) { + final Map provAction = getNode(ap, vn, xpath + xpath("provenanceaction")).getAttributes(); + dataInfoBuilder + .setInvisible(Boolean.valueOf(getValue(getNode(ap, vn, xpath + xpath("invisible")), String.valueOf(invisible)))) + .setInferred(Boolean.valueOf(getValue(getNode(ap, vn, xpath + xpath("inferred")), String.valueOf(defaultInferred)))) + .setDeletedbyinference(Boolean.valueOf( + getValue(getNode(ap, vn, xpath + xpath("deletedbyinference")), String.valueOf(defaultDeletedbyinference)))) + .setTrust(getValue(getNode(ap, vn, xpath + xpath("trust")), defaultTrust)) + .setInferenceprovenance(getValue(getNode(ap, vn, xpath + xpath("inferenceprovenance")), "")) + .setProvenanceaction(getSimpleQualifier( + getValue(provAction.get(CLASSID), defaultProvenanceaction), + DNET_PROVENANCE_ACTIONS)); + } + + return dataInfoBuilder; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationMap.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationMap.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/SpecificationMap.java (revision 58513) @@ -0,0 +1,15 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.HashMap; + +import com.google.protobuf.Descriptors.Descriptor; + +public class SpecificationMap extends HashMap { + + @Override + public SpecificationDescriptor put(Descriptor d, SpecificationDescriptor mp) { + super.put(d, mp); + return mp; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/ConfigurationTestConfig.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/ConfigurationTestConfig.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/transform/ConfigurationTestConfig.java (revision 58513) @@ -0,0 +1,37 @@ +package eu.dnetlib.data.transform; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +import com.mongodb.MongoClient; +import com.mongodb.client.MongoDatabase; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.ClassPathResource; + +@Configuration +public class ConfigurationTestConfig { + + @Bean + public MongoDatabase db() throws IOException { + + final Properties p = testProperties(); + + final MongoClient mongo = new MongoClient( + p.getProperty("mongodb.host"), + Integer.valueOf(p.getProperty("mongodb.port"))); + return mongo.getDatabase(p.getProperty("mongodb.dbname")); + } + + @Bean + public Properties testProperties() throws IOException { + final Properties p = new Properties(); + final ClassPathResource cp = new ClassPathResource("test.properties"); + try (final InputStream stream = cp.getInputStream()) { + p.load(stream); + } + return p; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java (revision 58513) @@ -0,0 +1,28 @@ +package eu.dnetlib.data.mapreduce.util; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +import java.util.List; + +import org.junit.Test; + +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.miscutils.functional.xml.IndentXmlString; + +public class OafDecoderTest { + + @Test + public void testAsXml() { + + final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity); + + assertNotNull(decoder); + + assertNotNull(decoder.asXml()); + + System.out.println(IndentXmlString.apply(decoder.asXml())); + + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Datasource.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Datasource.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Datasource.java (revision 58513) @@ -0,0 +1,64 @@ +package eu.dnetlib.data.bulktag; + + +import com.google.gson.Gson; +import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Node; + +/** + * Created by miriam on 01/08/2018. + */ +public class Datasource { + private static final Log log = LogFactory.getLog(Datasource.class); + + private String openaireId; + + private SelectionConstraints selectionConstraints; + + + public SelectionConstraints getSelCriteria() { + return selectionConstraints; + } + + public SelectionConstraints getSelectionConstraints() { + return selectionConstraints; + } + + public void setSelectionConstraints(SelectionConstraints selectionConstraints) { + this.selectionConstraints = selectionConstraints; + } + + public void setSelCriteria(SelectionConstraints selCriteria) { + this.selectionConstraints = selCriteria; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } + + private void setSelCriteria(String json, VerbResolver resolver){ + log.info("Selection constraints for datasource = " + json); + selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class); + + selectionConstraints.setSelection(resolver); + } + + public void setSelCriteria(Node n, VerbResolver resolver){ + try{ + setSelCriteria(n.getText(),resolver); + }catch(Exception e) { + log.info("not set selection criteria... "); + selectionConstraints =null; + } + + } + + + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java (revision 58513) @@ -0,0 +1,44 @@ +package eu.dnetlib.data.mapreduce.util; + +import eu.dnetlib.data.proto.DedupProtos.Dedup.RelName; +import eu.dnetlib.data.proto.OafProtos.OafRel; +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +public class OafRelDecoderTest { + + private OafRel oafRel; + + @Before + public void setUp() { + oafRel = OafTest.getDedupRel("ID_1", "ID_2", RelType.resultResult, "isMergedIn"); + } + + @Test + public void testSetClass() { + + OafRelDecoder d1 = OafRelDecoder.decode(oafRel); + + assertNotNull(d1); + assertEquals("isMergedIn", d1.getRelClass()); + + OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("isMergedIn").build()); + + assertEquals("isMergedIn", d2.getRelClass()); + assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassid()); + assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassname()); + + } + + @Test + public void testGetCF() { + assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, RelName.isMergedIn)); + assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, "isMergedIn")); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdUtilityParser.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdUtilityParser.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/VtdUtilityParser.java (revision 58513) @@ -0,0 +1,119 @@ +package eu.dnetlib.data.transform.xml2; + +import java.util.*; + +import com.ximpleware.*; +import org.apache.commons.lang3.StringUtils; + +public class VtdUtilityParser { + + public static final String NS_SEPARATOR = ":"; + + public static String xpath(final String ... p) { + return Arrays.stream(p) + .map(s -> String.format("/*[local-name()='%s']", s)) + .reduce((s1, s2) -> s1 + s2) + .get(); + } + + public static VTDGen parseXml(final String xml) throws VtdException { + final VTDGen vg = new VTDGen(); + vg.setDoc(xml.getBytes()); + try { + vg.parse(true); + } catch (ParseException e) { + throw new VtdException(e); + } + return vg; + } + + public static int countNodes(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return 0; + } + try { + ap.selectXPath(xpath); + ap.bind(vn); + final Double i = ap.evalXPathToNumber(); + return i.intValue(); + } catch (XPathParseException e) { + throw new VtdException(e); + } + } + + public static Node getNode(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return new Node(); + } + try { + ap.selectXPath(xpath); + + while (ap.evalXPath() != -1) { + return asNode(vn); + } + + return null; + } catch (Exception e) { + throw new VtdException(e); + } + } + + public static List getNodes(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException { + final List results = new ArrayList<>(); + if (StringUtils.isBlank(xpath)) { + return results; + } + try { + ap.selectXPath(xpath); + + while (ap.evalXPath() != -1) { + results.add(asNode(vn)); + } + return results; + } catch (Exception e) { + throw new VtdException(e); + } + } + + private static Node asNode(final VTDNav vn) throws NavException { + final Node currentNode = new Node(); + final String name = vn.toRawString(vn.getCurrentIndex()); + currentNode.setName(name.contains(NS_SEPARATOR) ? StringUtils.substringAfter(name, NS_SEPARATOR) : name); + + int t = vn.getText(); + if (t >= 0) { + currentNode.setTextValue(vn.toNormalizedString(t)); + } + currentNode.setAttributes(getAttributes(vn)); + return currentNode; + } + + private static Map getAttributes(final VTDNav vn) throws NavException { + final AutoPilot ap = new AutoPilot(vn); + ap.selectAttr("*"); + Map attributes = new HashMap<>(); + int i; + while((i = ap.iterateAttr()) != -1){ + attributes.put(vn.toNormalizedString(i),vn.toNormalizedString(i + 1) ); + } + return attributes; + } + + public static String getFirstValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return null; + } + try { + ap.selectXPath(xpath); + while (ap.evalXPath() != -1) { + int it = nav.getText(); + if (it > -1) + return nav.toNormalizedString(it); + } + return null; + } catch (Exception e) { + throw new VtdException(e); + } + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectTest.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectTest.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectTest.java (revision 58513) @@ -0,0 +1,50 @@ +package eu.dnetlib.openaire.exporter.model; + +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class ProjectTest { + + Project pMZOS; + Project pFP7; + + @Before + public void setUp() throws Exception { + pMZOS = new Project() + .setFunder("MZOS") + .setJurisdiction("HR") + .setFundingpathid("irb_hr______::MZOS") + .setAcronym("") + .setTitle("Project Title") + .setCode("115-1152437-2500") + .setStartdate("2007-01-01") + .setEnddate("2009-01-01"); + + pFP7 = new Project() + .setFunder("EC") + .setJurisdiction("EU") + .setFundingpathid("ec__________::EC::FP7::SP1::NMP") + .setAcronym("REFFIBRE") + .setTitle("Project Title") + .setCode("604187") + .setStartdate("2013-11-01") + .setEnddate("20015-01-01"); + } + + @Test + public void testIdNamespaceMZOS(){ + String ns = pMZOS.getIdnamespace(); + assertEquals("info:eu-repo/grantAgreement/MZOS//115-1152437-2500/HR", ns); + } + + @Test + public void testIdNamespaceFP7(){ + String ns = pFP7.getIdnamespace(); + assertEquals("info:eu-repo/grantAgreement/EC/FP7/604187/EU", ns); + } + + + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Dom4jUtilityParser.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Dom4jUtilityParser.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/xml2/Dom4jUtilityParser.java (revision 58513) @@ -0,0 +1,89 @@ +package eu.dnetlib.data.transform.xml2; + +import java.io.StringReader; +import java.util.*; + +import org.apache.commons.lang3.StringUtils; +import org.dom4j.Attribute; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.io.SAXReader; + +public class Dom4jUtilityParser { + + public static final String NS_SEPARATOR = ":"; + + public static String xpath(final String ... p) { + return Arrays.stream(p) + .map(s -> String.format("/*[local-name()='%s']", s)) + .reduce((s1, s2) -> s1 + s2) + .get(); + } + + public static Document parseXml(final String xml) throws DocumentException { + return new SAXReader().read(new StringReader(xml)); + } + + public static int countNodes(final Document document, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return 0; + } + List res = document.selectNodes(xpath); + return res != null ? res.size() : 0; + } + + public static Node getNode(final Document document, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return new Node(); + } + + return asNode(document.selectSingleNode(xpath)); + } + + public static List getNodes(final Document document, final String xpath) throws VtdException { + final List results = new ArrayList<>(); + if (StringUtils.isBlank(xpath)) { + return results; + } + + for(final Object o : document.selectNodes(xpath)) { + results.add(asNode((org.dom4j.Node) o)); + } + + return results; + } + + private static Node asNode(final org.dom4j.Node dNode) { + final Node currentNode = new Node(); + if (dNode == null) return currentNode; + final String name = dNode.getName(); + currentNode.setName(name.contains(NS_SEPARATOR) ? StringUtils.substringAfter(name, NS_SEPARATOR) : name); + currentNode.setTextValue(dNode.getText()); + currentNode.setAttributes(getAttributes(dNode)); + + return currentNode; + } + + private static Map getAttributes(final org.dom4j.Node dNode) { + final Map attributes = new HashMap<>(); + + if (dNode instanceof Element) { + Iterator it = ((Element) dNode).attributeIterator(); + while(it.hasNext()) { + final Attribute a = it.next(); + attributes.put(a.getName(), a.getValue()); + } + } + return attributes; + } + + public static String getFirstValue(final Document document, final String xpath) throws VtdException { + if (StringUtils.isBlank(xpath)) { + return null; + } + + return document.valueOf(xpath); + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Organization.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Organization.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Organization.java (revision 58513) @@ -0,0 +1,42 @@ +package eu.dnetlib.data.bulktag; + +import com.google.gson.Gson; +import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; +import org.dom4j.Node; + +public class Organization { + private String organizationId; + + private SelectionConstraints selCriteria; + + public String getOrganizationId() { + return organizationId; + } + + public void setOrganizationId(String organizationId) { + this.organizationId = organizationId; + } + + public SelectionConstraints getSelCriteria() { + return selCriteria; + } + + public void setSelCriteria(SelectionConstraints selCriteria) { + this.selCriteria = selCriteria; + } + + private void setSelCriteria(String json){ + //Type collectionType = new TypeToken>(){}.getType(); + selCriteria = new Gson().fromJson(json, SelectionConstraints.class); + + } + + public void setSelCriteria(Node n){ + if (n==null){ + selCriteria = null; + }else{ + setSelCriteria(n.getText()); + } + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraints.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraints.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraints.java (revision 58513) @@ -0,0 +1,77 @@ +package eu.dnetlib.data.bulktag; + +import com.google.common.reflect.TypeToken; +import com.google.gson.Gson; +import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Type; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + * Created by miriam on 02/08/2018. + */ +public class Constraints implements Serializable { + private static final Log log = LogFactory.getLog(Constraints.class); + //private ConstraintEncapsulator ce; + private List constraint; + + + public Constraints() { + } + public List getConstraint() { + return constraint; + } + + public void setConstraint(List constraint) { + this.constraint = constraint; + } + + public void setSc(String json){ + Type collectionType = new TypeToken>(){}.getType(); + constraint = new Gson().fromJson(json, collectionType); + + } + + void setSelection(VerbResolver resolver) { + for(Constraint st: constraint){ + + try { + st.setSelection(resolver); + } catch (NoSuchMethodException e) { + log.error(e.getMessage()); + } catch (IllegalAccessException e) { + log.error(e.getMessage()); + } catch (InvocationTargetException e) { + log.error(e.getMessage()); + } catch (InstantiationException e) { + log.error(e.getMessage()); + } + } + + } + + + //Constraint in and + public boolean verifyCriteria(final Map> param) { + + for(Constraint sc : constraint) { + boolean verified = false; + for(String value : param.get(sc.getField())){ + if (sc.verifyCriteria(value.trim())){ + verified = true; + } + } + if(!verified) + return verified; + } + return true; + } + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/NotEqualVerb.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/NotEqualVerb.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/NotEqualVerb.java (revision 58513) @@ -0,0 +1,29 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + + +@VerbClass("not_equals") +public class NotEqualVerb implements Selection { + + private String param; + + + public NotEqualVerb(final String param) { + this.param = param; + } + + public NotEqualVerb() { + } + + public String getParam() { + return param; + } + + public void setParam(String param) { + this.param = param; + } + + @Override + public boolean apply(String value) { + return !value.equalsIgnoreCase(param); + } +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbResolver.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbResolver.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbResolver.java (revision 58513) @@ -0,0 +1,24 @@ +package eu.dnetlib.data.bulktag.selectioncriteria; + +import org.reflections.Reflections; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.Map; +import java.util.stream.Collectors; + +public class VerbResolver implements Serializable { + private final Map> map; + + public VerbResolver(){ + this.map = new Reflections("eu.dnetlib").getTypesAnnotatedWith(VerbClass.class).stream() + .collect(Collectors.toMap(v -> v.getAnnotation(VerbClass.class).value(), v->(Class)v)); + } + + + public Selection getSelectionCriteria(String name, String param) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException { + + return map.get(name).getDeclaredConstructor((String.class)).newInstance(param); + + } +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/publication.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/publication.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/publication.xml (revision 58513) @@ -0,0 +1,70 @@ + + + + od______1064::fe947e59cf7db2f039b4c8cc25693fb0 + 95168db1-d57e-4b99-855b-993cf91d1283_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:ora.ouls.ox.ac.uk:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76 + + + + urn:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76 + 2a02b271-0756-453c-b2f0-8c472a8806a5_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= + + 2013-05-10T16:04:02Z + od______1064 + + + Uphoff, S + Holden, SJ + 2011-01-01 + The analysis of structure and dynamics of biomolecules is important for understanding their function. Toward this aim, we introduce a method called 'switchable FRET', which combines single-molecule fluorescence resonance energy transfer (FRET) with reversible photoswitching of fluorophores. Typically, single-molecule FRET is measured within a single donor-acceptor pair and reports on only one distance. Although multipair FRET approaches that monitor multiple distances have been developed, they are technically challenging and difficult to extend, mainly because of their reliance on spectrally distinct acceptors. In contrast, switchable FRET sequentially probes FRET between a single donor and spectrally identical photoswitchable acceptors, dramatically reducing the experimental and analytical complexity and enabling direct monitoring of multiple distances. Our experiments on DNA molecules, a protein-DNA complex and dynamic Holliday junctions demonstrate the potential of switchable FRET for studying dynamic, multicomponent biomolecules. + http://pub.uni-bielefeld.de/publication/2303387 + eng + Monitoring multiple distances within a single molecule using switchable FRET. + Symplectic Elements at Oxford + PubMed (http://www.ncbi.nlm.nih.gov/pubmed/) + Web of Science (Lite) (http://apps.webofknowledge.com/summary.do) + Biotinylation + Computer Simulation + 0001 + urn:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76 + pii:nmeth.1502 + local:71163 + eissn:1548-7105 + doi:10.1038/nmeth.1502 + issn:1548-7091 + Silver Spring, MD + 2016-12-31 + EMBARGO + issn____::12345678 + + + http://xyz + My favourite journal + My second favourite journal + corda_______::609823 + 10.1038/nmeth.1502 + http://creativecommons.org/licenses/by/3.0/ + http://www.ebi.ac.uk/interpro/entry/IPR004915 + http://www.ebi.ac.uk/interpro/entry/TTTTTTTTT + 10.1038/nmeth.1500 + + + + + https://epublications.vu.lt/oai + oai:elaba:11687676 + 2018-01-19T13:21:15Z + http://www.openarchives.org/OAI/2.0/oai_dc/ + + + + false + false + 0.9 + + + + + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/author_1000.json.gz =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/author_1000.json.gz =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/author_1000.json.gz (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/author_1000.json.gz (revision 58513) Property changes on: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/author_1000.json.gz ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/dataset.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/dataset.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/transform/dataset.xml (revision 58513) @@ -0,0 +1,90 @@ + + + + r37b0ad08687::00066f8025c9f9c99eaf3bba41e0ee0b + 10.5281/zenodo.11877 + 2018-04-30T02:17:56.67Z + r37b0ad08687 + oai:zenodo.org:11877 + 2017-05-30T02:33:14Z + user-dighl + openaire_data + user-zenodo + 2018-04-30T02:21:21.245Z + + + + 10.5281/zenodo.11877 + + http://dx.doi.org/10.5281/zenodo.11877 + + + + Johann-Mattis List + + + + Supplementary Material For "Sequence Comparison In Historical Linguistics" + + Zenodo + 2014 + + historical linguistics + phonetic alignment + cognate detection + computational linguistics + + + 2014-09-26 + + + + https://github.com/SequenceComparison/SupplementaryMaterial/tree/v1.0 + 10.1038/nmeth.1502 + + + Creative Commons Attribution 4.0 + Open Access + + + This is the official version of the supplementary material which was used as the basis for the study on "Sequence Comparison in Historical Linguistics" (List, Dusseldorf, Dusseldorf University Press). + + + 0021 + 2014-01-01 + OPEN + und + corda_______::609823 + + + + + + + https://zenodo.org/oai2d + oai:zenodo.org:11877 + 2017-05-30T02:33:14Z + + + + + false + false + 0.9 + + + + + Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration_selcrit.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration_selcrit.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration_selcrit.xml (revision 58513) @@ -0,0 +1,193 @@ + + + + + + + + + + + + + + + + + + + + SDG13 - Climate action + SDG8 - Decent work and economic growth + SDG15 - Life on land + SDG2 - Zero hunger + SDG17 - Partnerships for the goals + SDG10 - Reduced inequalities + SDG5 - Gender equality + SDG12 - Responsible consumption and production + SDG14 - Life below water + SDG6 - Clean water and sanitation + SDG11 - Sustainable cities and communities + SDG1 - No poverty + SDG3 - Good health and well being + SDG7 - Affordable and clean energy + SDG4 - Quality education + SDG9 - Industry innovation and infrastructure + SDG16 - Peace justice and strong institutions + + + + + 123 + + + + + + + + + + + + + + + + + brain mapping + brain imaging + electroencephalography + arterial spin labelling + brain fingerprinting + brain + neuroimaging + Multimodal Brain Image Analysis + fMRI + neuroinformatics + fetal brain + brain ultrasonic imaging + topographic brain mapping + diffusion tensor imaging + computerized knowledge assessment + connectome mapping + brain magnetic resonance imaging + brain abnormalities + + + + re3data_____::5b9bf9171d92df854cf3c520692e9122 + + + + doajarticles::c7d3de67dc77af72f6747157441252ec + + + + re3data_____::8515794670370f49c1d176c399c714f5 + + + + doajarticles::d640648c84b10d425f96f11c3de468f3 + + + + doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a + + + + + + + + marine + ocean + fish + aqua + sea + + + + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb + + + + + + + + animal production and health + fisheries and aquaculture + food safety and human nutrition + information management + food technology + agri-food education and extension + natural resources and environment + food system + engineering technology and Research + agriculture + food safety risk assessment + food security + farming practices and systems + plant production and protection + agri-food economics and policy + food distribution + forestry + + + + opendoar____::1a551829d50f1400b0dab21fdd969c04 + + + + opendoar____::49af6c4e558a7569d80eee2e035e2bd7 + + + + opendoar____::0266e33d3f546cb5436a10798e657d97 + + + + opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06 + + + + opendoar____::41bfd20a38bb1b0bec75acf0845530a7 + + + + opendoar____::87ae6fb631f7c8a627e8e28785d9992d + + + + + + + oac_clarin + + + + re3data_____::a507cdacc5bbcc08761c92185dee5cab + + + + + + + oaa_dariah + + + + openaire____::1cfdb2e14977f31a98e0118283401f32 + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + + + + dimpo + + + + + \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/community_configuration.xml (revision 58513) @@ -0,0 +1,176 @@ + + + + + + + + + + + + + + + + + + + + SDG13 - Climate action + SDG8 - Decent work and economic growth + SDG15 - Life on land + SDG2 - Zero hunger + SDG17 - Partnerships for the goals + SDG10 - Reduced inequalities + SDG5 - Gender equality + SDG12 - Responsible consumption and production + SDG14 - Life below water + SDG6 - Clean water and sanitation + SDG11 - Sustainable cities and communities + SDG1 - No poverty + SDG3 - Good health and well being + SDG7 - Affordable and clean energy + SDG4 - Quality education + SDG9 - Industry innovation and infrastructure + SDG16 - Peace justice and strong institutions + + + + + 123 + + + + + + + + + + + + + + + + + brain mapping + brain imaging + electroencephalography + arterial spin labelling + brain fingerprinting + brain + neuroimaging + Multimodal Brain Image Analysis + fMRI + neuroinformatics + fetal brain + brain ultrasonic imaging + topographic brain mapping + diffusion tensor imaging + computerized knowledge assessment + connectome mapping + brain magnetic resonance imaging + brain abnormalities + + + + re3data_____::5b9bf9171d92df854cf3c520692e9122 + + + + doajarticles::c7d3de67dc77af72f6747157441252ec + + + + re3data_____::8515794670370f49c1d176c399c714f5 + + + + doajarticles::d640648c84b10d425f96f11c3de468f3 + + + + doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a + + + + + + + + marine + ocean + fish + aqua + sea + + + + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb + + + + + + + + animal production and health + fisheries and aquaculture + food safety and human nutrition + information management + food technology + agri-food education and extension + natural resources and environment + food system + engineering technology and Research + agriculture + food safety risk assessment + food security + farming practices and systems + plant production and protection + agri-food economics and policy + food distribution + forestry + + + + opendoar____::1a551829d50f1400b0dab21fdd969c04 + + + + opendoar____::49af6c4e558a7569d80eee2e035e2bd7 + + + + opendoar____::0266e33d3f546cb5436a10798e657d97 + + + + opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06 + + + + opendoar____::41bfd20a38bb1b0bec75acf0845530a7 + + + + opendoar____::87ae6fb631f7c8a627e8e28785d9992d + + + + + + + oac_clarin + + + + re3data_____::a507cdacc5bbcc08761c92185dee5cab + + + + + + \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraint.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraint.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/bulktag/Constraint.java (revision 58513) @@ -0,0 +1,61 @@ +package eu.dnetlib.data.bulktag; + +import eu.dnetlib.data.bulktag.selectioncriteria.Selection; +import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; +import org.springframework.beans.factory.annotation.Autowired; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; + + +public class Constraint implements Serializable { + private String verb; + private String field; + private String value; + private Selection selection; + + public Constraint() { + } + + public String getVerb() { + return verb; + } + + public void setVerb(String verb) { + this.verb = verb; + } + + public String getField() { + return field; + } + + public void setField(String field) { + this.field = field; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + + + public void setSelection(Selection sel){ + selection = sel; + } + + public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { + selection = resolver.getSelectionCriteria(verb,value); + } + + + public boolean verifyCriteria(String metadata){ + return selection.apply(metadata); + } + + + +} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java (revision 58513) @@ -0,0 +1,77 @@ +package eu.dnetlib.data.mapreduce.util; + +import eu.dnetlib.data.proto.RelTypeProtos.RelType; +import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; + +public class RelDescriptor { + + public static final String SEPARATOR = "_"; + + private final String it; + + // relType also corresponds to the Ontology code + private final RelType relType; + + private final SubRelType subRelType; + + private final String relClass; + + + // + public RelDescriptor(final String value) { + super(); + this.it = value; + + String[] s = value.split(SEPARATOR); + + this.relType = RelType.valueOf(s[0]); + this.subRelType = SubRelType.valueOf(s[1]); + this.relClass = s[2]; + + } + + public SubRelType getSubRelType() { + return subRelType; + } + + public RelType getRelType() { + return relType; + } + + public String getRelClass() { + return relClass; + } + + public String getIt() { + return it; + } + + + @Override + public String toString() { + return getIt(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + ((it == null) ? 0 : it.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + RelDescriptor other = (RelDescriptor) obj; + if (it == null) { + if (other.it != null) return false; + } else if (!it.equals(other.it)) return false; + return true; + } + +} Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/test.xml =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/test.xml (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/data/bulktag/test.xml (revision 58513) @@ -0,0 +1,17 @@ + + + + + + opendoar____::7e7757b1e12abcb736ab9a754ffb617a + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + + + dimpo + + + + + \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/deploy.info =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/deploy.info (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/deploy.info (revision 58513) @@ -0,0 +1,2 @@ +{"type_source": "SVN", "goal": "package -U source:jar", +"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-mapping-utils"} \ No newline at end of file Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/openaire/exporter/model/projectDetails.csv =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/openaire/exporter/model/projectDetails.csv (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/test/resources/eu/dnetlib/openaire/exporter/model/projectDetails.csv (revision 58513) @@ -0,0 +1,5 @@ +nih_________::3R01GM073898-02S1,,3R01GM073898-02S1,23188 $,,"{""orgname"":""UNIVERSITY OF CALIFORNIA SAN DIEGO"", ""activity"":""R01"", ""administeringic"":""GM"", ""serialnumber"":""73898"", ""coreprojectnum"":""R01GM073898""}","[""\u003cfundingtree\u003e\n \u003cfunder\u003e\n \u003cid\u003enih_________::NIH\u003c/id\u003e\n \u003cshortname\u003eNIH\u003c/shortname\u003e\n \u003cname\u003eNational Institutes of Health\u003c/name\u003e\n \u003cjurisdiction\u003eUS\u003c/jurisdiction\u003e\n \u003c/funder\u003e\n \u003cfunding_level_0\u003e\n \u003cid\u003enih_________::NIH::NATIONAL_INSTITUTE_OF_GENERAL_MEDICAL_SCIENCES\u003c/id\u003e\n \u003cname\u003eNATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES\u003c/name\u003e\n \u003cdescription\u003eNATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES\u003c/description\u003e\n \u003cparent/\u003e\n \u003cclass\u003enih:fundingStream\u003c/class\u003e\n \u003c/funding_level_0\u003e\n \u003c/fundingtree\u003e""]" +corda_______::100202,RECOMP,100202,JTI-CP-ARTEMIS,http://cordis.europa.eu/fp7/home_en.html,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003eec__________::EC\u003c/id\u003e\u003cshortname\u003eEC\u003c/shortname\u003e\u003cname\u003eEuropean Commission\u003c/name\u003e\u003cjurisdiction\u003eEU\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_2\u003e\u003cid\u003eec__________::EC::FP7::SP1::SP1-JTI\u003c/id\u003e\u003cdescription\u003eJoint Technology Initiatives (Annex IV-SP1)\u003c/description\u003e\u003cname\u003eSP1-JTI\u003c/name\u003e\u003cclass\u003eec:program\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_1\u003e\u003cid\u003eec__________::EC::FP7::SP1\u003c/id\u003e\u003cdescription\u003eSP1-Cooperation\u003c/description\u003e\u003cname\u003eSP1\u003c/name\u003e\u003cclass\u003eec:specificprogram\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003eec__________::EC::FP7\u003c/id\u003e\u003cdescription\u003eSEVENTH FRAMEWORK PROGRAMME\u003c/description\u003e\u003cname\u003eFP7\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003eec:frameworkprogram\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/parent\u003e\u003c/funding_level_2\u003e\u003c/fundingtree\u003e""]" +corda__h2020::633080,MACC-III,633080,SPACE,SPACE,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003eec__________::EC\u003c/id\u003e\u003cshortname\u003eEC\u003c/shortname\u003e\u003cname\u003eEuropean Commission\u003c/name\u003e\u003cjurisdiction\u003eEU\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_1\u003e\u003cid\u003eec__________::EC::H2020::CSA\u003c/id\u003e\u003cdescription\u003eCoordination and support action\u003c/description\u003e\u003cname\u003eCSA\u003c/name\u003e\u003cclass\u003eec:h2020toas\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003eec__________::EC::H2020\u003c/id\u003e\u003cname\u003eH2020\u003c/name\u003e\u003cdescription\u003eHorizon 2020 Framework Programme\u003c/description\u003e\u003cparent/\u003e\u003cclass\u003eec:h2020fundings\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/fundingtree\u003e""]" +nsf_________::0000096,,0000096,,,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003ensf_________::NSF\u003c/id\u003e\u003cshortname\u003eNSF\u003c/shortname\u003e\u003cname\u003eNational Science Foundation\u003c/name\u003e\u003cjurisdiction\u003eUS\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_1\u003e\u003cid\u003ensf_________::NSF::OD::OD/OIA\u003c/id\u003e\u003cdescription\u003eOffice of Integrative Activities\u003c/description\u003e\u003cname\u003eOffice of Integrative Activities\u003c/name\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003ensf_________::NSF::OD\u003c/id\u003e\u003cdescription\u003eOffice of the Director\u003c/description\u003e\u003cname\u003eOffice of the Director\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003ensf:fundingStream\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/fundingtree\u003e""]" +fct_________::100107,PTDC/SAU-ESA/100107/2008,100107,,,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003efct_________::FCT\u003c/id\u003e\u003cshortname\u003eFCT\u003c/shortname\u003e\u003cname\u003eFundação para a Ciência e a Tecnologia, I.P.\u003c/name\u003e\u003cjurisdiction\u003ePT\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_0\u003e\u003cid\u003efct_________::FCT::5876-PPCDTI\u003c/id\u003e\u003cdescription\u003e5876-PPCDTI\u003c/description\u003e\u003cname\u003e5876-PPCDTI\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003efct:program\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/fundingtree\u003e""]" Index: modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafEntityMerger.java =================================================================== --- modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafEntityMerger.java (nonexistent) +++ modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.43/src/main/java/eu/dnetlib/data/transform/OafEntityMerger.java (revision 58513) @@ -0,0 +1,360 @@ +package eu.dnetlib.data.transform; + +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.Message.Builder; +import eu.dnetlib.data.proto.FieldTypeProtos; +import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; +import eu.dnetlib.data.proto.FieldTypeProtos.StringField; +import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; +import eu.dnetlib.data.proto.KindProtos.Kind; +import eu.dnetlib.data.proto.OafProtos.Oaf; +import eu.dnetlib.data.proto.OafProtos.OafEntity; +import eu.dnetlib.data.proto.OrganizationProtos.Organization; +import eu.dnetlib.data.proto.ResultProtos.Result; +import eu.dnetlib.data.proto.ResultProtos.Result.Instance; +import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust; +import eu.dnetlib.data.proto.TypeProtos.Type; +import eu.dnetlib.pace.config.DedupConfig; +import org.apache.commons.lang.StringUtils; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.util.stream.Collectors.toMap; + +public class OafEntityMerger { + + private static final String DEDUP_CLASSID = "sysimport:dedup"; + + private static final String DNET_PROVENANCE_SCHEME = "dnet:provenanceActions"; + + private final Predicate skipEmptyStringField = s -> (s != null) && (s.getValue() != null) && !s.getValue().isEmpty(); + + private final Predicate skipEmptyString = s -> StringUtils.isNotBlank(s); + + public static Oaf.Builder merge(final String id, final Iterable entities) { + return merge(null, id, entities); + } + + public static Oaf.Builder merge(final DedupConfig dedupConf, final String id, final Iterable entities) { + return new OafEntityMerger().mergeEntities(dedupConf, id, entities); + } + + public static Oaf.Builder merge(final Oaf.Builder builder) { + return new OafEntityMerger().doMergeEntities(builder); + } + + public Oaf.Builder mergeEntities(final DedupConfig dedupConf, final String id, final Iterable entities) { + + Oaf.Builder builder = Oaf.newBuilder(); + String trust = "0.0"; + + final Collection dateofacceptance = Lists.newArrayList(); + final Collection> authors = Lists.newArrayList(); + final Collection alternativenames = Lists.newArrayList(); + + for (final Oaf oaf : TrustOrdering.sort(entities)) { + // doublecheck we're dealing only with main entities + if (!oaf.getKind().equals(Kind.entity)) throw new IllegalArgumentException("expected OafEntity!"); + + final String currentTrust = oaf.getDataInfo().getTrust(); + if (!currentTrust.equals(SpecialTrust.NEUTRAL.toString())) { + trust = currentTrust; + } + + builder.mergeFrom(oaf); + + if (oaf.getEntity().getType().equals(Type.result)) { + final Result.Metadata mb = oaf.getEntity().getResult().getMetadata(); + + //gather the dateofacceptance(s) + dateofacceptance.add(mb.getDateofacceptance().getValue()); + + //gather the authors + authors.add(mb.getAuthorList()); + } + + if (oaf.getEntity().getType().equals(Type.organization)) { + + final Organization.Metadata mb = oaf.getEntity().getOrganization().getMetadata(); + + // gather the organization names + alternativenames.add(mb.getLegalname()); + alternativenames.add(mb.getLegalshortname()); + if (mb.getAlternativeNamesCount() > 0) { + alternativenames.addAll(mb.getAlternativeNamesList()); + } + } + } + + builder = doMergeEntities(builder); + builder.getEntityBuilder().setId(id); + builder.getDataInfoBuilder() + .setInvisible(false) + .setInferred(true) + .setDeletedbyinference(false) + .setTrust(trust) + .setInferenceprovenance(dedupConf != null ? dedupConf.getWf().getConfigurationId() : "") + .setProvenanceaction(getProvenanceAction()); + + if (builder.getEntity().getType().equals(Type.result)) { + Result.Metadata.Builder mb = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder(); + mb.setDateofacceptance(DatePicker.pick(dateofacceptance)); + mb.clearAuthor().addAllAuthor(AuthorMerger.merge(authors)); + } + + if (builder.getEntity().getType().equals(Type.organization)) { + if (!alternativenames.isEmpty()) { + Organization.Metadata.Builder mb = builder.getEntityBuilder().getOrganizationBuilder().getMetadataBuilder(); + mb.clearAlternativeNames(); + mb.addAllAlternativeNames( + alternativenames.stream() + .filter(sf -> StringUtils.isNotBlank(sf.getValue())) + .collect(toMap(StringField::getValue, sf -> sf, (s1, s2) -> s2)) + .values()); + } + } + + if ((dedupConf != null) && dedupConf.getWf().isIncludeChildren()) { + for (final Oaf oaf : Iterables.limit(entities, dedupConf.getWf().getMaxChildren())) { + builder.getEntityBuilder().addChildren(oaf.getEntity()); + } + } + + return builder; + } + + private Qualifier.Builder getProvenanceAction() { + return Qualifier.newBuilder().setClassid(DEDUP_CLASSID).setClassname(DEDUP_CLASSID).setSchemeid(DNET_PROVENANCE_SCHEME) + .setSchemename(DNET_PROVENANCE_SCHEME); + } + + public Oaf.Builder doMergeEntities(final Oaf.Builder builder) { + + for (final String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.COLLECTEDFROM_FIELD_NUMBER)) { + setKeyValues(builder.getEntityBuilder(), field); + } + for (final String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.PID_FIELD_NUMBER)) { + setStructuredProperty(builder.getEntityBuilder(), field); + } + for (final String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.ORIGINALID_FIELD_NUMBER)) { + setUniqueString(builder.getEntityBuilder(), field); + } + + switch (builder.getEntity().getType()) { + case datasource: + break; + case organization: + break; + case project: + break; + case result: + final Result.Metadata.Builder result = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder(); + setTitle(result); + mergeInstances(builder.getEntityBuilder().getResultBuilder()); + + // for (String field : Lists.newArrayList("subject", "relevantdate")) { + for (final String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.SUBJECT_FIELD_NUMBER, + Result.Metadata.RELEVANTDATE_FIELD_NUMBER)) { + setStructuredProperty(result, field); + } + for (final String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.DESCRIPTION_FIELD_NUMBER)) { + setLongestStringField(result, field); + } + for (final String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.SOURCE_FIELD_NUMBER)) { + setUniqueStringField(result, field); + } + + mergeContexts(result); + mergeCountries(result); + + break; + default: + break; + } + return builder; + } + + private void mergeCountries(Result.Metadata.Builder result) { + final Map cMap = Maps.newHashMap(); + for(Qualifier country : result.getCountryList()) { + if (!cMap.containsKey(country.getClassid())) { + cMap.put(country.getClassid(), Qualifier.newBuilder(country)); + } + } + if (!cMap.isEmpty()) { + result.clearCountry(); + for(Qualifier.Builder country : cMap.values()) { + result.addCountry(country.build()); + } + } + } + + private void mergeContexts(Result.Metadata.Builder result) { + final Map cMap = Maps.newHashMap(); + for(Result.Context c : result.getContextList()) { + if (!cMap.containsKey(c.getId())) { + //TODO merge DataInfo + cMap.put(c.getId(), Result.Context.newBuilder(c)); + } + } + if (!cMap.isEmpty()) { + result.clearContext(); + for (Result.Context.Builder b : cMap.values()) { + result.addContext(b.build()); + } + } + } + + private void mergeInstances(final Result.Builder builder) { + final Map map = Maps.newHashMap(); + for(Instance i : builder.getInstanceList()) { + + final String key = i.getHostedby().getKey() + i.getAccessright().getClassid() + i.getInstancetype().getClassid(); + if (!map.containsKey(key)) { + map.put(key, Instance.newBuilder().mergeFrom(i)); + continue; + } + + map.get(key).addAllUrl(i.getUrlList()); + } + + for(Instance.Builder i : map.values()) { + final Set urls = Sets.newHashSet(); + urls.addAll(i.getUrlList()); + i.clearUrl().addAllUrl(urls); + } + builder.clearInstance(); + builder.addAllInstance(Iterables.transform(map.values(), b -> b.build())); + } + + /** + * Helper method, avoid duplicated StructuredProperties in the given builder for the given fieldName + * + * @param builder + * @param fieldName + */ + @SuppressWarnings("unchecked") + private void setStructuredProperty(final Builder builder, final String fieldName) { + final Map
This is the official version of the supplementary material which was used as the basis for the study on "Sequence Comparison in Historical Linguistics" (List, Dusseldorf, Dusseldorf University Press).