Project

General

Profile

« Previous | Next » 

Revision 50818

implemented incremental transfrom and resolving

View differences:

modules/dnet-pid-resolver/trunk/pom.xml
42 42
            <version>1.8.5</version>
43 43
            <scope>test</scope>
44 44
        </dependency>
45
        <dependency>
46
            <groupId>eu.dnetlib</groupId>
47
            <artifactId>dnet-msro-service</artifactId>
48
            <version>5.0.0-SNAPSHOT</version>
49
        </dependency>
45 50
    </dependencies>
46 51
</project>
webapps/dnet-dli-container/trunk/src/main/resources/eu/dnetlib/cnr-site.properties
66 66
#dnet.logger.mongo.host=localhost
67 67
dnet.logger.mongo.port=27017
68 68
dnet.modular.ui.authorization.mongo.host=playground-t.dnet.d4science.org
69
services.mdstore.mongodb.host=playground-t.dnet.d4science.org
69
#services.mdstore.mongodb.host=playground-t.dnet.d4science.org
70 70
#dnet.modular.ui.authorization.mongo.host=localhost
71
#services.mdstore.mongodb.host=localhost
72
services.dli.resolver.store.DatabaseName=resolvedStore
71
services.mdstore.mongodb.host=localhost
72
services.dli.resolver.store.DatabaseName=dliResolvedStore
73

  
74
services.dli.resolver.crossRef.dump=ES
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/IncrementalTransformationJobNode.java
1
package eu.dnetlib.msro.workflows.nodes.transform;
2

  
3
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
4
import eu.dnetlib.msro.logging.DnetLogger;
5
import eu.dnetlib.msro.workflows.graph.Arc;
6
import eu.dnetlib.msro.workflows.nodes.DateProcessUtils;
7
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
8
import eu.dnetlib.msro.workflows.procs.Env;
9
import eu.dnetlib.msro.workflows.procs.ProcessAware;
10
import eu.dnetlib.msro.workflows.procs.WorkflowProcess;
11
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.springframework.beans.factory.annotation.Autowired;
15

  
16
import java.util.HashMap;
17
import java.util.Map;
18

  
19
public class IncrementalTransformationJobNode extends SimpleJobNode implements ProcessAware {
20

  
21
    enum TransformationType {
22
        INCREMENTAL,
23
        REFRESH
24
    }
25

  
26
    private static final Log log = LogFactory.getLog(IncrementalTransformationJobNode.class);
27

  
28
    private String transformationType;
29

  
30
    private WorkflowProcess process;
31

  
32
    @Autowired
33
    private DnetLogger dnetLogger;
34

  
35
    @Autowired
36
    private UniqueServiceLocator locator;
37

  
38
    @Override
39
    protected String execute(Env env) throws Exception {
40

  
41
        if (TransformationType.INCREMENTAL.toString().equalsIgnoreCase(getTransformationType())) {
42
            final String endDate = DateProcessUtils.getEndDate(process, dnetLogger);
43
            if (endDate == null )
44
            {
45
                env.setAttribute("collectionMode", TransformationType.REFRESH.toString());
46
            }
47
            else {
48
                env.setAttribute("collectionMode", TransformationType.INCREMENTAL.toString());
49
                env.setAttribute("incrementalDateFrom", endDate);
50
            }
51
        }
52
        return Arc.DEFAULT_ARC;
53
    }
54

  
55

  
56

  
57

  
58

  
59
    public void setTransformationType(String transformationType) {
60
        this.transformationType = transformationType;
61
    }
62

  
63
    public String getTransformationType() {
64
        return transformationType;
65
    }
66

  
67
    @Override
68
    public void setProcess(WorkflowProcess process) {
69
        this.process = process;
70
    }
71
}
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml
33 33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34 34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35 35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
36
				<PROPERTY key="mapred.reduce.tasks" value="100"/>
37 37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38 38

  
39 39
				<!--  	Uncomment to override the default lib path -->
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/mdstore/StoreMDStoreRecordsJobNode.java
39 39
		final ResultSet<?> rs = token.getEnv().getAttribute(getEprParam(), ResultSet.class);
40 40

  
41 41
		token.setProgressProvider(new ResultsetProgressProvider(rs, this.resultSetClient));
42
		final String collectionMode = token.getEnv().getAttribute("collectionMode", String.class);
42 43

  
43 44
		job.setAction("FEED");
44 45
		job.getParameters().put("epr", rs.toJson());
45
		job.getParameters().put("storingType", getStoringType());
46
		job.getParameters().put("storingType",collectionMode!=null?collectionMode:getStoringType());
46 47
		job.getParameters().put("mdId", getMdId());
47 48
	}
48 49

  
modules/dnet-dli/trunk/src/test/java/eu/dnetlib/dli/transform/DLITransformTest.java
104 104

  
105 105
    }
106 106

  
107

  
107 108
    @Test
109
    public void testENATransform() throws Exception {
110

  
111
        final InputStream xsltAsStream =
112
                getClass().getResourceAsStream("/eu/dnetlib/dli/transform/ena.xslt");
113
        final TransformerFactory factory = TransformerFactory.newInstance();
114
        factory.newTransformer();
115
        final StreamSource xsltSource = new StreamSource(xsltAsStream);
116
        final InputStream recordStream = this.getClass().getResourceAsStream("/eu/dnetlib/dli/transform/input_ena.xml");
117
        final Transformer transformer = factory.newTransformer(xsltSource);
118
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
119
        final StringWriter output = new StringWriter();
120
        transformer.transform(new StreamSource(recordStream), new StreamResult(output));
121
        System.out.println(output.toString());
122

  
123
    }
124

  
125
    @Test
108 126
    public void testIEEETransform() throws Exception {
109 127

  
110 128
        final InputStream xsltAsStream =
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/transform/input_ena.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/"
3
            xmlns:dri="http://www.driver-repository.eu/namespace/dri"
4
            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
5
    <oai:header>
6
        <dri:objIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf"
7
                           xmlns:datacite="http://datacite.org/schema/kernel-3">r3d100010527::00000648e410cb638823564cf5fdc885</dri:objIdentifier>
8
        <dri:recordIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf"
9
                              xmlns:datacite="http://datacite.org/schema/kernel-3">http://www.ebi.ac.uk/ena/data/search/?query=11118631&amp;result=sequence_release::url</dri:recordIdentifier>
10
        <dri:dateOfCollection xmlns:oaf="http://namespace.dnet.eu/oaf"
11
                              xmlns:datacite="http://datacite.org/schema/kernel-3">2017-09-18T14:07:46.84+02:00</dri:dateOfCollection>
12
        <dri:repositoryId xmlns:oaf="http://namespace.dnet.eu/oaf"
13
                          xmlns:datacite="http://datacite.org/schema/kernel-3">dli_________::r3d100010527</dri:repositoryId>
14
        <dri:datasourceprefix xmlns:oaf="http://namespace.dnet.eu/oaf"
15
                              xmlns:datacite="http://datacite.org/schema/kernel-3">r3d100010527</dri:datasourceprefix>
16
    </oai:header>
17
    <metadata xmlns:oaf="http://namespace.dnet.eu/oaf"
18
              xmlns:datacite="http://datacite.org/schema/kernel-3"
19
              xmlns:dc="http://purl.org/dc/elements/1.1/">
20
        <resource xmlns="http://datacite.org/schema/kernel-3"
21
                  xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
22
            <identifier xmlns="" identifierType="url">http://www.ebi.ac.uk/ena/data/search/?query=11118631&amp;result=sequence_release</identifier>
23
            <titles xmlns="">
24
                <title>Nucleotide sequences (Release) (1/2) of "Molecular analysis of a t(7;14)(q35;q32) chromosome translocation in a T cell leukemia of a patient with ataxia telangiectasia." </title>
25
            </titles>
26
            <dates xmlns="">
27
                <date dateType="Collected">2000-04-21</date>
28
            </dates>
29
            <descriptions xmlns="">
30
                <description>Catharanthus roseus 2C-methyl-D-erythritol 2,4-cyclodiphosphate synthase (MECS) mRNA, complete cds.</description>
31
            </descriptions>
32
            <subjects xmlns="">
33
                <subject subjectScheme="organism">Catharanthus roseus</subject>
34
                <subject subjectScheme="codon_start">1</subject>
35
                <subject subjectScheme="gene">MECS</subject>
36
                <subject subjectScheme="product">2C-methyl-D-erythritol 2,4-cyclodiphosphate synthase</subject>
37
                <subject subjectScheme="note">YgbB protein</subject>
38
                <subject subjectScheme="protein_id">AAF65155.1</subject>
39
                <subject subjectScheme="translation">MAMATSFYCSTAIPSKKTNQNRENFLCSPVGGSKTTPSYIRLSTRQSRTLSLVVSAAASGAAVEAEPKFAAVTPSKILSF
40
                    RVGHGFDLHRLEPGYPLIIGGINIPHDRGCEAHSDGDVLLHCVVDAILGALGLPDIGQIFPDTDPKWKGAPSSVFIKEAV
41
                    RLMDEAGYELGNLDATLILQRPKVSPHKEAIRQNLCQLLGADPCVVNLKAKTHEKVDSLGENRSIAAHTVVLLMRK</subject>
42

  
43
            </subjects>
44
            <resourceType xmlns="" resourceTypeGeneral="dataset">dataset</resourceType>
45
            <oaf:relatedIdentifier xmlns="" entityType="publication" inverseRelationType="isRelatedTo"
46
                                   relatedIdentifierType="dnet"
47
                                   relationType="isRelatedTo">r3d100010527::f89cc26f592efddee8437a6a73a6ab5a</oaf:relatedIdentifier>
48
        </resource>
49
    </metadata>
50
    <oai:about>
51
        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance"
52
                    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
53
            <originDescription xmlns="" altered="true" harvestDate="2018-01-24T16:52:33.227+01:00">
54
                <baseURL>%2Fsrv%2Fmongo%2Fdli_import%2Fena.gz</baseURL>
55
                <identifier/>
56
                <datestamp/>
57
                <metadataNamespace/>
58
            </originDescription>
59
        </provenance>
60
    </oai:about>
61
</oai:record>
modules/dnet-msro-service/trunk/src/main/resources/eu/dnetlib/msro/service/applicationContext-msro-nodes.xml
102 102
		class="eu.dnetlib.msro.workflows.nodes.transform.TransformJobNode"
103 103
		scope="prototype"/>
104 104

  
105
	<bean id="wfNodeIncrementalTransformation"
106
		  class="eu.dnetlib.msro.workflows.nodes.transform.IncrementalTransformationJobNode"
107
		  scope="prototype"/>
108

  
109

  
105 110
	<bean id="wfNodeApplyXslt"
106 111
		class="eu.dnetlib.msro.workflows.nodes.transform.ApplyXsltJobNode"
107 112
		scope="prototype"/>
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/transform/ena.xslt
1
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"                
2
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
3
                xmlns:dc="http://purl.org/dc/elements/1.1/"
4
                xmlns:oaf="http://namespace.dnet.eu/oaf" version="2.0" exclude-result-prefixes="xsl">
5
    <xsl:template match="/">
6
        <xsl:apply-templates mode="dli"></xsl:apply-templates>
7
    </xsl:template>
8

  
9
    <xsl:template match="@*|node()" mode="dli">
10
        <xsl:copy>
11
            <xsl:apply-templates select="@*|node()" mode="dli"/>
12
        </xsl:copy>
13
    </xsl:template>
14

  
15
    <xsl:template match="*[local-name()='about']" mode="dli">
16
        <oaf:about>
17
            <oaf:datainfo>
18
                <oaf:completionStatus>complete</oaf:completionStatus>
19
                <xsl:choose>
20
                    <xsl:when test="//dc:type='publication'">
21
                        <oaf:collectedFrom completionStatus="incomplete" id="r3d100010527" name="European Nucleotide Archive" />
22
                        <oaf:resolvedFrom completionStatus="complete" id="dli_________::pubmed" name="PubMed"/>
23
                        <oaf:provisionMode>resolved</oaf:provisionMode>
24
                    </xsl:when>
25
                    <xsl:otherwise>
26
                        <oaf:collectedFrom completionStatus="complete" id="r3d100010527" name="European Nucleotide Archive" />
27
                        <oaf:provisionMode>collected</oaf:provisionMode>
28
                    </xsl:otherwise>
29
                </xsl:choose>
30
            </oaf:datainfo>
31
        </oaf:about>
32
    </xsl:template>
33

  
34
</xsl:stylesheet>
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/resolver/CrossrefResolver.java
17 17
import org.bson.Document;
18 18
import org.bson.conversions.Bson;
19 19
import org.springframework.beans.factory.annotation.Autowired;
20
import org.springframework.beans.factory.annotation.Required;
20 21

  
21 22
// TODO: Auto-generated Javadoc
22 23

  
......
42 43
	@Autowired
43 44
	private CrossRefParserJSON parser;
44 45

  
46
	@Autowired
47
	private MongoClient mongoClient;
45 48

  
49
	private MongoDatabase db;
50

  
51
	private String dumpType;
52

  
53

  
46 54
	@Override
47 55
	protected boolean canResolvePid(final String pidType) {
48 56
		return (pidType != null) && ("doi".equals(pidType.toLowerCase().trim()) || "handle".equals(pidType.toLowerCase().trim()));
......
68 76
            if (record.getPid() == null) return null;
69 77
			return record;
70 78
		} catch (Throwable e) {
71
			log.error("Error on resolve pid  " + pid, e);
79
			log.error("¯\\_(ツ)_/¯ Error on resolve pid  " + pid, e);
72 80
		}
73 81
		return null;
74 82
	}
75 83

  
76 84

  
77 85
	private String retrieveCrossRefFromDump(final String pid) {
86
		if (dumpType.equalsIgnoreCase("mongo")){
87
			return retrieveCrossRefFromDumpMongo(pid);
88
		}
89
		else if (dumpType.equalsIgnoreCase("ES")){
90
			return retrieveCrossRefFromDumpES(pid);
91
		}
92
		throw new  RuntimeException("incorrect dump Type expected [mongo, ES] found: "+dumpType );
78 93

  
94
	}
79 95

  
80
		final String response = requestURL("http://ip-90-147-167-137.ct1.garrservices.it:9200/crossref/item/" + pid.replaceAll("/","%2F"));
81

  
82
		return response;
96
	private String retrieveCrossRefFromDumpES(final String pid) {
97
		return  requestURL("http://ip-90-147-167-137.ct1.garrservices.it:9200/crossref/item/" + pid.replaceAll("/","%2F"));
83 98
	}
84 99

  
85 100

  
101
	private String retrieveCrossRefFromDumpMongo(final String pid) {
102
		if (db == null) {
103
			db = mongoClient.getDatabase("crossRef");
86 104

  
105
		}
106
		final MongoCollection<Document> crossRef = db.getCollection("dump");
87 107

  
108
		DBObject query = QueryBuilder.start("_id").is(pid).get();
109
		FindIterable<Document> documents = crossRef.find((Bson) query).limit(1);
110
		MongoCursor<Document> iterator = documents.iterator();
111
		if (iterator.hasNext()){
112
			return iterator.next().toJson();
113
		}
114
		return null;
115
	}
88 116

  
117
	public String getDumpType() {
118
		return dumpType;
119
	}
89 120

  
90

  
121
	@Required
122
	public void setDumpType(String dumpType) {
123
		this.dumpType = dumpType;
124
	}
91 125
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/resolver/CrossRefParserJSON.java
38 38
		if (record == null) return null;
39 39
		JsonElement jElement = new JsonParser().parse(record);
40 40

  
41
		JsonElement source = jElement.getAsJsonObject().get("_source");
42
		if (source== null || !source.isJsonObject())
41
		JsonElement source = null;
42

  
43
		if (jElement.getAsJsonObject().has("_source")) {
44
			source = jElement.getAsJsonObject().get("_source");
45
			if (source == null || !source.isJsonObject())
46
				return null;
47
		}
48
		else if(jElement.getAsJsonObject().has("DOI")){
49
			source = jElement;
50
		} else {
43 51
			return null;
52
		}
44 53

  
45 54
		final JsonObject message = source.getAsJsonObject();
46 55
        DLIResolvedObject currentObject = new DLIResolvedObject();
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/resolver/applicationContext-dli-resolver.properties
1 1
services.dli.resolver.store.DatabaseName=resolverStore
2
services.dli.resolver.store.CollectionName=resolverCollection
2
services.dli.resolver.store.CollectionName=resolverCollection
3
services.dli.resolver.crossRef.dump=mongo
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/resolver/applicationContext-dli-resolver.xml
34 34
		<property name="cache" ref="dliResolverCache"/>
35 35
	</bean>
36 36

  
37
    <bean id="crossrefResolver" class="eu.dnetlib.dli.resolver.CrossrefResolver" p:order="0" p:availableOffline="true">
38
		<property name="cache" ref="dliResolverCache"/>
37
    <bean id="crossrefResolver" class="eu.dnetlib.dli.resolver.CrossrefResolver" p:order="0" p:availableOffline="true"
38
		p:dumpType="${services.dli.resolver.crossRef.dump}" p:cache-ref="dliResolverCache">
39 39
	</bean>
40 40

  
41 41
	<bean id="dliOfflineResolver" class="eu.dnetlib.dli.resolver.DLIOfflineResolver" p:order="0" p:availableOffline="true">
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_wf.xml.st
23 23
            	<PARAM name="unknownMdstoreId"                  description="Store for unknown records"             required="true" managedBy="system" category="MDSTORE_ID"/>
24 24
            	<PARAM name="cleanTransformationRuleId"         description="Transformation Rule Identifier"        required="true" managedBy="user"   category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI:')"/>
25 25
            	<PARAM description="Type of Transformation" function="validValues(['simpleTransform', 'transformAndUnpack'])" managedBy="user" name="typeOfTransform" required="true" type="string">simpleTransform</PARAM>
26
            	<PARAM description="Incremental Transformation" function="validValues(['INCREMENTAL', 'REFRESH'])" managedBy="user" name="collectionMode" required="false" type="string">REFRESH</PARAM>
26 27
            	<PARAM description="Resolving offline" function="validValues(['false', 'true'])" managedBy="user" name="offlineResolving" required="true" type="string">false</PARAM>
28
            	<PARAM name="from_date"                         description="Start Date of Harvesting"              required="false" managedBy="user"   category="COLLECTION" type="string"/>
27 29
            </PARAMETERS>
28 30
            <WORKFLOW>
29 31
                <NODE isStart="true" name="collection" type="LaunchWorkflowTemplate">
......
35 37
                    			<ENTRY key="dsId"           value="$dsId$" />
36 38
 								<ENTRY key="interface"      value="$interface$" />
37 39
	 							<ENTRY key="collMdstoreId"  ref="collMdstoreId" />
40
	 							<ENTRY key="collectionMode"  ref="collectionMode" />
41
	 							<ENTRY key="from_date"      ref="from_date" />
38 42
                    		</MAP>
39 43
                    	</PARAM>
40 44
                    </PARAMETERS>
......
53 57
 								<ENTRY key="collMdstoreId"          ref="collMdstoreId" />
54 58
 								<ENTRY key="cleanMdstoreId"         ref="cleanMdstoreId" />
55 59
 								<ENTRY key="cleanRuleId"            ref="cleanTransformationRuleId" />
56
 								<ENTRY key="typeOfTransform"        ref="typeOfTransform" />
60
 								<ENTRY key="collectionMode"         ref="collectionMode" />
61
 								<ENTRY key="typeOfTransform"         ref="typeOfTransform" />
57 62

  
58 63
 							</MAP>
59 64
 						</PARAM>
......
73 78
                                    <ENTRY key="collMdstoreId"          ref="collMdstoreId" />
74 79
                                    <ENTRY key="cleanMdstoreId"         ref="cleanMdstoreId" />
75 80
                                    <ENTRY key="offlineResolving"       ref="offlineResolving" />
81
                                    <ENTRY key="collectionMode"         ref="collectionMode" />
76 82
                                </MAP>
77 83
                            </PARAM>
78 84
                        </PARAMETERS>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/ena_transform.xml
12 12
            <IMPORTED/>
13 13
            <SCRIPT>
14 14
                <TITLE>DLI: ENA transform</TITLE>
15
                <CODE><![CDATA[<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
16
    <xsl:template match="@*|node()">
15
                <CODE><![CDATA[<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
16
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
17
                xmlns:dc="http://purl.org/dc/elements/1.1/"
18
                xmlns:oaf="http://namespace.dnet.eu/oaf" version="2.0" exclude-result-prefixes="xsl">
19
    <xsl:template match="/">
20
        <xsl:apply-templates mode="dli"></xsl:apply-templates>
21
    </xsl:template>
22

  
23
    <xsl:template match="@*|node()" mode="dli">
17 24
        <xsl:copy>
18
            <xsl:apply-templates select="@*|node()"/>
19
        </xsl:copy>
25
            <xsl:apply-templates select="@*|node()" mode="dli"/>
26
        </xsl:copy>elasticsearchFeedDliSummaryDataJob
20 27
    </xsl:template>
28

  
29
    <xsl:template match="*[local-name()='about']" mode="dli">
30
        <oaf:about>
31
            <oaf:datainfo>
32
                <oaf:completionStatus>complete</oaf:completionStatus>
33
                <xsl:choose>
34
                    <xsl:when test="//dc:type='publication'">
35
                        <oaf:collectedFrom completionStatus="incomplete" id="r3d100010527" name="European Nucleotide Archive" />
36
                        <oaf:resolvedFrom completionStatus="complete" id="dli_________::pubmed" name="PubMed"/>
37
                        <oaf:provisionMode>resolved</oaf:provisionMode>
38
                    </xsl:when>
39
                    <xsl:otherwise>
40
                        <oaf:collectedFrom completionStatus="complete" id="r3d100010527" name="European Nucleotide Archive" />
41
                        <oaf:provisionMode>collected</oaf:provisionMode>
42
                    </xsl:otherwise>
43
                </xsl:choose>
44
            </oaf:datainfo>
45
        </oaf:about>
46
    </xsl:template>
47

  
21 48
</xsl:stylesheet>
22 49
]]></CODE>
23 50
            </SCRIPT>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/RepositoryServiceResources/ccdc.xml
37 37
				<INTERFACE active="true" compliance="dli" contentDescription="metadata" id="api_________::opendoar____::2367::0"
38 38
				           label="Links provider" typology="links::provider" removable="false">
39 39
					<ACCESS_PROTOCOL format="oai_datacite" set="BL.CCDC">oai</ACCESS_PROTOCOL>
40
					<BASE_URL>http://oai.datacite.org/oai</BASE_URL>
40
					<BASE_URL>https://oai.datacite.org/oai</BASE_URL>
41 41
					<INTERFACE_EXTRA_FIELD name="last_collection_date"/>
42 42
					<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='header']/*[local-name()='identifier']
43 43
					</INTERFACE_EXTRA_FIELD>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/RepositoryServiceResources/datacite.xml
36 36
			<INTERFACES>
37 37
				<INTERFACE active="true" compliance="dli" contentDescription="metadata" id="api_________::datacite::0"
38 38
				           label="Links provider" typology="links::provider" removable="false">
39
					<ACCESS_PROTOCOL>mongoDump</ACCESS_PROTOCOL>
40
					<BASE_URL>/home/sandro/mongo_backup/datacite3000.json</BASE_URL>
39
					<ACCESS_PROTOCOL format="oai_datacite">oai</ACCESS_PROTOCOL>
40
					<BASE_URL>https://oai.datacite.org/oai</BASE_URL>
41 41
					<INTERFACE_EXTRA_FIELD name="last_collection_date"/>
42 42
					<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='header']/*[local-name()='identifier']
43 43
					</INTERFACE_EXTRA_FIELD>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_resolver_template.xml
16 16
				<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/>
17 17
                <PARAM name="numberOfThreads" description="number of threads for resolve PID" required="true"   type="string" default="4"/>
18 18
				<PARAM name="offlineResolving" description="decide if you want resolve offline using only the resolved item" required="true" type="boolean" default="false"/>
19
				<PARAM name="collectionMode" description="Define the type of collection mode [INCREMENTAL, REFRESH]" required="true" default="REFRESH" type="string"/>
19 20
			</PARAMETERS>
20 21
			<WORKFLOW>
21 22

  
......
26 27
						<PARAM name="pluginName" value="dliResolverPlugin"/>
27 28
                        <PARAM name="numberOfThreads" ref="numberOfThreads"/>
28 29
                        <PARAM name="offline" ref="offlineResolving"/>
30
                        <PARAM name="collectionMode" ref="collectionMode"/>
29 31
                    </PARAMETERS>
30 32
					<ARCS>
31 33
						<ARC to="UPDATE_INFO"/>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_collection_template.xml
13 13
				<PARAM name="dsId" description="Datasource Id" required="true" type="string"/>
14 14
				<PARAM name="interface" description="Datasource Interface" required="true" type="string"/>
15 15
				<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/>
16
				<PARAM name="from_date" description="From Date" required="false" type="string"/>
17
				<PARAM name="collectionMode" description="Define the type of collection mode [INCREMENTAL, REFRESH]" required="true" default="REFRESH" type="string"/>
16 18
			</PARAMETERS>
17 19
			<WORKFLOW>
18
				<NODE name="COLLECT_REFRESH" type="CollectRecords" isStart="true">
20
				<NODE name="FindDateRangeForIncrementalHarvesting" type="FindDateRangeForIncrementalHarvesting"
21
					  isStart="true">
22
					<DESCRIPTION>Find Last execution</DESCRIPTION>
23
					<PARAMETERS>
24
						<PARAM name="fromDateParam" ref="fromDateParam"/>
25
						<PARAM name="collectionMode" ref="collectionMode"/>
26
					</PARAMETERS>
27
					<ARCS>
28
						<ARC to="COLLECT"/>
29
					</ARCS>
30
				</NODE>
31
				<NODE name="COLLECT" type="DateRangeCollectRecords">
19 32
					<DESCRIPTION>Start Harvesting</DESCRIPTION>
20 33
					<PARAMETERS>
21 34
						<PARAM name="datasourceId" ref="dsId"/>
22 35
						<PARAM name="datasourceInterface" ref="interface"/>
23 36
						<PARAM name="eprParam" value="collected_epr"/>
37
						<PARAM name="from" ref="from_date"/>
38
						<PARAM name="fromDateParam" ref="fromDateParam"/>
24 39
					</PARAMETERS>
25 40
					<ARCS>
26 41
						<ARC to="MD_BUILDER"/>
......
35 50
						<PARAM name="datasourceInterface" ref="interface"/>
36 51
					</PARAMETERS>
37 52
					<ARCS>
38
						<ARC to="STORE_REFRESH"/>
53
						<ARC to="STORE"/>
39 54
					</ARCS>
40 55
				</NODE>
41
				<NODE name="STORE_REFRESH" type="StoreMDStoreRecords">
56
				<NODE name="STORE" type="StoreMDStoreRecords">
42 57
					<DESCRIPTION>Store mdstore records</DESCRIPTION>
43 58
					<PARAMETERS>
44 59
						<PARAM name="mdId" ref="collMdstoreId"/>
45
						<PARAM name="storingType" value="REFRESH"/>
60
						<PARAM name="storingType" ref="collectionMode"/>
46 61
						<PARAM name="eprParam" value="store_epr"/>
47 62
					</PARAMETERS>
48 63
					<ARCS>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_transform_template.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2 1
<RESOURCE_PROFILE>
3 2
	<HEADER>
4 3
		<RESOURCE_IDENTIFIER value="da3531c6-2bf6-48ab-848b-bd4c6379fd65_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
5 4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
6 5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
7 6
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
7
		<DATE_OF_CREATION value="2018-02-12T12:49:19+01:00"/>
9 8
	</HEADER>
10 9
	<BODY>
11 10
		<CONFIGURATION>
12 11
			<PARAMETERS>
13
				<PARAM name="dsId" description="Datasource Id" required="true" type="string"/>
14
				<PARAM name="interface" description="Datasource Interface" required="true" type="string"/>
15
				<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/>
16
				<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/>
17
				<PARAM name="cleanRuleId" description="Transformation Rule Id" required="true" type="string"/>
18
                <PARAM name="typeOfTransform" description="Type of transformation" required="true" type="string"/>
12
				<PARAM description="Datasource Id" name="dsId" required="true" type="string"/>
13
				<PARAM description="Datasource Interface" name="interface" required="true" type="string"/>
14
				<PARAM description="Store for collected records" name="collMdstoreId" required="true" type="string"/>
15
				<PARAM description="Store for cleaned records" name="cleanMdstoreId" required="true" type="string"/>
16
				<PARAM description="Transformation Rule Id" name="cleanRuleId" required="true" type="string"/>
17
				<PARAM description="Type of transformation" name="typeOfTransform" required="true" type="string"/>
18
				<PARAM name="collectionMode" description="Define the type of collection mode [INCREMENTAL, REFRESH]" required="true" default="REFRESH" type="string"/>
19 19
			</PARAMETERS>
20 20
			<WORKFLOW>
21
				<NODE name="fetchOriginals" type="FetchMDStoreRecords" isStart="true">
21
				<NODE isStart="true" name="setIncrementalTransformation" type="IncrementalTransformation">
22
					<DESCRIPTION>Set incremental transformation</DESCRIPTION>
23
					<PARAMETERS>
24
						<PARAM name="transformationType" ref="collectionMode"/>
25
					</PARAMETERS>
26
					<ARCS>
27
						<ARC to="fetchOriginals"/>
28
					</ARCS>
29
				</NODE>
30
				<NODE name="fetchOriginals" type="FetchMDStoreRecords">
22 31
					<DESCRIPTION>Fetch records from MDStore</DESCRIPTION>
23 32
					<PARAMETERS>
24 33
						<PARAM name="mdId" ref="collMdstoreId"/>
25 34
						<PARAM name="eprParam" value="orig_epr"/>
26 35
					</PARAMETERS>
27 36
					<ARCS>
28
                        <ARC to="transformType"/>
37
						<ARC to="transformType"/>
29 38
					</ARCS>
30 39
				</NODE>
31

  
32
                <NODE name="transformType" type="Selection">
33
                    <DESCRIPTION>Clean original records</DESCRIPTION>
34
                    <PARAMETERS>
35
                        <PARAM name="selection" ref="typeOfTransform"/>
36
                    </PARAMETERS>
37
                    <ARCS>
38
                        <ARC name="simpleTransform" to="simpleTransform"/>
39
                        <ARC name="transformAndUnpack" to="transformAndUnpack"/>
40
                    </ARCS>
41
                </NODE>
42

  
43

  
44
                <NODE name="simpleTransform" type="Transform">
40
				<NODE name="transformType" type="Selection">
45 41
					<DESCRIPTION>Clean original records</DESCRIPTION>
46 42
					<PARAMETERS>
43
						<PARAM name="selection" ref="typeOfTransform"/>
44
					</PARAMETERS>
45
					<ARCS>
46
						<ARC name="simpleTransform" to="simpleTransform"/>
47
						<ARC name="transformAndUnpack" to="transformAndUnpack"/>
48
					</ARCS>
49
				</NODE>
50
				<NODE name="simpleTransform" type="Transform">
51
					<DESCRIPTION>Clean original records</DESCRIPTION>
52
					<PARAMETERS>
47 53
						<PARAM name="ruleId" ref="cleanRuleId"/>
48 54
						<PARAM name="inputEprParam" value="orig_epr"/>
49 55
						<PARAM name="outputEprParam" value="clean_epr"/>
50 56
					</PARAMETERS>
51 57
					<ARCS>
52
                        <ARC to="storeSimpleRecords"/>
58
						<ARC to="storeSimpleRecords"/>
53 59
					</ARCS>
54 60
				</NODE>
55

  
56

  
57
                <NODE name="storeSimpleRecords" type="StoreMDStoreRecords">
58
                    <DESCRIPTION>Store mdtore records</DESCRIPTION>
59
                    <PARAMETERS>
60
                        <PARAM name="mdId" ref="cleanMdstoreId"/>
61
                        <PARAM name="storingType" value="REFRESH"/>
62
                        <PARAM name="eprParam" value="clean_epr"/>
63
                    </PARAMETERS>
64
                    <ARCS>
65
                        <ARC to="UPDATE_INFO"/>
66
                    </ARCS>
67
                </NODE>
68

  
69

  
70
                <NODE name="transformAndUnpack" type="Transform">
71
                    <DESCRIPTION>Clean original records</DESCRIPTION>
72
                    <PARAMETERS>
73
                        <PARAM name="ruleId" ref="cleanRuleId"/>
74
                        <PARAM name="inputEprParam" value="orig_epr"/>
75
                        <PARAM name="outputEprParam" value="pack_epr"/>
76
                    </PARAMETERS>
77
                    <ARCS>
78
                        <ARC to="unpackRecord"/>
79
                    </ARCS>
80
                </NODE>
81

  
82
                <NODE name="unpackRecord" type="Unpack">
83
                    <DESCRIPTION>Unpack transformed records</DESCRIPTION>
84
                    <PARAMETERS>
85
                        <PARAM name="xpath" value="//*[local-name()='record']"/>
86
                        <PARAM name="inputEprParam" value="pack_epr"/>
87
                        <PARAM name="outputEprParam" value="clean_epr"/>
88
                    </PARAMETERS>
89
                    <ARCS>
90
                        <ARC to="storeUnpackedRecords"/>
91
                    </ARCS>
92
                </NODE>
93

  
94
                <NODE name="storeUnpackedRecords" type="StoreMDStoreRecords">
61
				<NODE name="storeSimpleRecords" type="StoreMDStoreRecords">
95 62
					<DESCRIPTION>Store mdtore records</DESCRIPTION>
96 63
					<PARAMETERS>
97 64
						<PARAM name="mdId" ref="cleanMdstoreId"/>
......
102 69
						<ARC to="UPDATE_INFO"/>
103 70
					</ARCS>
104 71
				</NODE>
105

  
72
				<NODE name="transformAndUnpack" type="Transform">
73
					<DESCRIPTION>Clean original records</DESCRIPTION>
74
					<PARAMETERS>
75
						<PARAM name="ruleId" ref="cleanRuleId"/>
76
						<PARAM name="inputEprParam" value="orig_epr"/>
77
						<PARAM name="outputEprParam" value="pack_epr"/>
78
					</PARAMETERS>
79
					<ARCS>
80
						<ARC to="unpackRecord"/>
81
					</ARCS>
82
				</NODE>
83
				<NODE name="unpackRecord" type="Unpack">
84
					<DESCRIPTION>Unpack transformed records</DESCRIPTION>
85
					<PARAMETERS>
86
						<PARAM name="xpath" value="//*[local-name()='record']"/>
87
						<PARAM name="inputEprParam" value="pack_epr"/>
88
						<PARAM name="outputEprParam" value="clean_epr"/>
89
					</PARAMETERS>
90
					<ARCS>
91
						<ARC to="storeUnpackedRecords"/>
92
					</ARCS>
93
				</NODE>
94
				<NODE name="storeUnpackedRecords" type="StoreMDStoreRecords">
95
					<DESCRIPTION>Store mdtore records</DESCRIPTION>
96
					<PARAMETERS>
97
						<PARAM name="mdId" ref="cleanMdstoreId"/>
98
						<PARAM name="storingType" value="REFRESH"/>
99
						<PARAM name="eprParam" value="clean_epr"/>
100
					</PARAMETERS>
101
					<ARCS>
102
						<ARC to="UPDATE_INFO"/>
103
					</ARCS>
104
				</NODE>
106 105
				<NODE name="UPDATE_INFO" type="MDStoreToApiExtraField">
107 106
					<DESCRIPTION>Update datasouce API extra fields</DESCRIPTION>
108 107
					<PARAMETERS>
......
120 119
			</WORKFLOW>
121 120
		</CONFIGURATION>
122 121
	</BODY>
123
</RESOURCE_PROFILE>
122
</RESOURCE_PROFILE>
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_dmf.xml
1 1
<?xml version="1.0" encoding="UTF-8"?>
2
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/"
3
            xmlns="http://namespace.openaire.eu/">
4
    <oai:header xmlns="">
5
        <dri:objIdentifier xmlns:dri="http://www.driver-repository.eu/namespace/dri">
6
            r3d100010464::a57f57564a6f58554f6dad6a954bbf55
7
        </dri:objIdentifier>
8
        <dri:recordIdentifier xmlns:dri="http://www.driver-repository.eu/namespace/dri">1959.1/468446::hdl
9
        </dri:recordIdentifier>
10
        <dri:dateOfCollection xmlns:dri="http://www.driver-repository.eu/namespace/dri">2017-09-22T08:57:59.857+02:00
11
        </dri:dateOfCollection>
12
        <dri:repositoryId xmlns:dri="http://www.driver-repository.eu/namespace/dri">
13
            ands_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=
14
        </dri:repositoryId>
15
        <dri:datasourceprefix xmlns:dri="http://www.driver-repository.eu/namespace/dri">r3d100010464
16
        </dri:datasourceprefix>
2
<oai:record xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
            xmlns:oai="http://www.openarchives.org/OAI/2.0/"
4
            xmlns:dri="http://www.driver-repository.eu/namespace/dri">
5
    <oai:header>
6
        <dri:objIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf"
7
                           xmlns:datacite="http://datacite.org/schema/kernel-3">r3d100010527::00000648e410cb638823564cf5fdc885</dri:objIdentifier>
8
        <dri:recordIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf"
9
                              xmlns:datacite="http://datacite.org/schema/kernel-3">http://www.ebi.ac.uk/ena/data/search/?query=11118631&amp;result=sequence_release::url</dri:recordIdentifier>
10
        <dri:dateOfCollection xmlns:oaf="http://namespace.dnet.eu/oaf"
11
                              xmlns:datacite="http://datacite.org/schema/kernel-3">2017-09-18T14:07:46.84+02:00</dri:dateOfCollection>
12
        <dri:repositoryId xmlns:oaf="http://namespace.dnet.eu/oaf"
13
                          xmlns:datacite="http://datacite.org/schema/kernel-3">dli_________::r3d100010527</dri:repositoryId>
14
        <dri:datasourceprefix xmlns:oaf="http://namespace.dnet.eu/oaf"
15
                              xmlns:datacite="http://datacite.org/schema/kernel-3">r3d100010527</dri:datasourceprefix>
17 16
    </oai:header>
18
    <metadata xmlns="">
17
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/"
18
              xmlns:oaf="http://namespace.dnet.eu/oaf"
19
              xmlns:datacite="http://datacite.org/schema/kernel-3">
19 20
        <resource xmlns="http://datacite.org/schema/kernel-3"
20
                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21 21
                  xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
22
            <identifier xmlns="" identifierType="hdl">1959.1/468446</identifier>
23
            <titles xmlns="">
24
                <title>Clinician practices while using health information system security in Australian public
25
                    hospitals: data
26
                </title>
22
            <identifier identifierType="url">http://www.ebi.ac.uk/ena/data/search/?query=11118631&amp;result=sequence_release</identifier>
23
            <titles>
24
                <title>Nucleotide sequences (Release) (1/2) of "Molecular analysis of a t(7;14)(q35;q32) chromosome translocation in a T cell leukemia of a patient with ataxia telangiectasia." </title>
27 25
            </titles>
28
            <publisher xmlns="">Monash University</publisher>
29
            <dates xmlns="">
30
                <date dateType="Collected">2012-11-28</date>
26
            <dates>
27
                <date dateType="Collected">2000-04-21</date>
31 28
            </dates>
32
            <creators xmlns="">
33
                <creator>
34
                    <creatorName>Dr Juanita Fernando</creatorName>
35
                </creator>
36
            </creators>
37
            <resourceType xmlns="" resourceTypeGeneral="Dataset">Dataset</resourceType>
38
            <!--<relatedIdentifiers xmlns="">-->
39
                <!--<relatedIdentifier entityType="                     publication                 "-->
40
                                   <!--inverseRelationType="hasAssociationWith"-->
41
                                   <!--relatedIdentifierType="dnet"-->
42
                                   <!--relationType="hasAssociationWith">r3d100010255::d5ad02b122901f0d9d59f8348768d0f9-->
43
                <!--</relatedIdentifier>-->
44
            <!--</relatedIdentifiers>-->
29
            <descriptions>
30
                <description>Catharanthus roseus 2C-methyl-D-erythritol 2,4-cyclodiphosphate synthase (MECS) mRNA, complete cds.</description>
31
            </descriptions>
32
            <subjects>
33
                <subject subjectScheme="organism">Catharanthus roseus</subject>
34
                <subject subjectScheme="codon_start">1</subject>
35
                <subject subjectScheme="gene">MECS</subject>
36
                <subject subjectScheme="product">2C-methyl-D-erythritol 2,4-cyclodiphosphate synthase</subject>
37
                <subject subjectScheme="note">YgbB protein</subject>
38
                <subject subjectScheme="protein_id">AAF65155.1</subject>
39
                <subject subjectScheme="translation">MAMATSFYCSTAIPSKKTNQNRENFLCSPVGGSKTTPSYIRLSTRQSRTLSLVVSAAASGAAVEAEPKFAAVTPSKILSF
40
                    RVGHGFDLHRLEPGYPLIIGGINIPHDRGCEAHSDGDVLLHCVVDAILGALGLPDIGQIFPDTDPKWKGAPSSVFIKEAV
41
                    RLMDEAGYELGNLDATLILQRPKVSPHKEAIRQNLCQLLGADPCVVNLKAKTHEKVDSLGENRSIAAHTVVLLMRK</subject>
42

  
43
            </subjects>
44
            <resourceType xmlns="" resourceTypeGeneral="dataset">dataset</resourceType>
45
            <oaf:relatedIdentifier entityType="publication" inverseRelationType="isRelatedTo"
46
                                   relatedIdentifierType="dnet"
47
                                   relationType="isRelatedTo">r3d100010527::f89cc26f592efddee8437a6a73a6ab5a</oaf:relatedIdentifier>
45 48
        </resource>
46 49
    </metadata>
47
    <oaf:about xmlns:oaf="http://namespace.dnet.eu/oaf" xmlns="">
50
    <oaf:about xmlns:oaf="http://namespace.dnet.eu/oaf">
48 51
        <oaf:datainfo>
49
            <oaf:collectedFrom completionStatus="complete" id="dli_________::r3d100010464"
50
                               name="Australian National Data Service"/>
51 52
            <oaf:completionStatus>complete</oaf:completionStatus>
52
            <oaf:provisionMode>collected</oaf:provisionMode>
53

  
54
            <oaf:collectedFrom completionStatus="complete" id="dli_________::datacite"
55
                               name="Datasets in Datacite"/>
56

  
53 57
        </oaf:datainfo>
54 58
    </oaf:about>
59

  
60

  
55 61
</oai:record>
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_pmf.xml
1 1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:oaf="http://namespace.dnet.eu/oaf"
3
        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
4
        xmlns:dc="http://purl.org/dc/elements/1.1/">
5
    <oai:header xmlns:oai="http://www.openarchives.org/OAI/2.0/"
6
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
7
        <dri:objIdentifier>pubmed______::000087d2ea077b8526bbc3c1436737ac</dri:objIdentifier>
8
        <dri:resolvedDate>2017-10-31T16:52:57.209</dri:resolvedDate>
9
        <dri:recordIdentifier>EuropePMC:26375944</dri:recordIdentifier>
10
        <dri:dateOfCollection>2017-10-31T16:51:58.293+01:00</dri:dateOfCollection>
11
        <dri:repositoryId>pbm_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
12
        <dri:datasourceprefix>pubmed______</dri:datasourceprefix>
2
<oai:record xmlns:oaf="http://namespace.dnet.eu/oaf"
3
            xmlns:datacite="http://datacite.org/schema/kernel-3"
4
            xmlns:oai="http://www.openarchives.org/OAI/2.0/"
5
            xmlns:dri="http://www.driver-repository.eu/namespace/dri">
6
    <oai:header>
7
        <dri:objIdentifier>dli_resolver::b65cd86a86f67af94a3a25452b8ab30d</dri:objIdentifier>
8
        <dri:recordIdentifier>b65cd86a86f67af94a3a25452b8ab30d</dri:recordIdentifier>
9
        <dri:dateOfCollection>2017-12-20T15:37:59.445+01:00</dri:dateOfCollection>
10
        <dri:repositoryId>dli_resolver</dri:repositoryId>
11
        <dri:datasourceprefix>dli_resolver</dri:datasourceprefix>
13 12
    </oai:header>
14
    <metadata>
15
        <oaf:pid type="pmid">26375944</oaf:pid>
16
        <dc:identifier>http://www.ncbi.nlm.nih.gov/pubmed/26375944</dc:identifier>
17
        <dc:title>Reclassification of Saccharomycodes sinensis, Proposal of Yueomyces sinensis gen. nov., comb. nov. within Saccharomycetaceae (Saccharomycetales, Saccharomycotina).</dc:title>
18
        <dc:creator>Long Wang</dc:creator>
19
        <dc:creator>Marizeth Groenewald</dc:creator>
20
        <dc:creator>Qi-Ming Wang</dc:creator>
21
        <dc:creator>Teun Boekhout</dc:creator>
22
        <dc:date>2015-09-16</dc:date>
13
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
14
        <oaf:pid type="doi">10.1002/jame.20038</oaf:pid>
15
        <dc:identifier>http://dx.doi.org/10.1002/jame.20038</dc:identifier>
16
        <dc:title>"Climate and carbon cycle changes from 1850 to 2100 in MPI-ESM simulations for the Coupled Model Intercomparison Project phase 5"</dc:title>
17
        <dc:creator>Giorgetta Marco A.</dc:creator>
18
        <dc:creator>Jungclaus Johann</dc:creator>
19
        <dc:creator>Reick Christian H.</dc:creator>
20
        <dc:creator>Legutke Stephanie</dc:creator>
21
        <dc:creator>Bader Jürgen</dc:creator>
22
        <dc:creator>Böttinger Michael</dc:creator>
23
        <dc:creator>Brovkin Victor</dc:creator>
24
        <dc:creator>Crueger Traute</dc:creator>
25
        <dc:creator>Esch Monika</dc:creator>
26
        <dc:creator>Fieg Kerstin</dc:creator>
27
        <dc:creator>Glushak Ksenia</dc:creator>
28
        <dc:creator>Gayler Veronika</dc:creator>
29
        <dc:creator>Haak Helmuth</dc:creator>
30
        <dc:creator>Hollweg Heinz-Dieter</dc:creator>
31
        <dc:creator>Ilyina Tatiana</dc:creator>
32
        <dc:creator>Kinne Stefan</dc:creator>
33
        <dc:creator>Kornblueh Luis</dc:creator>
34
        <dc:creator>Matei Daniela</dc:creator>
35
        <dc:creator>Mauritsen Thorsten</dc:creator>
36
        <dc:creator>Mikolajewicz Uwe</dc:creator>
37
        <dc:creator>Mueller Wolfgang</dc:creator>
38
        <dc:creator>Notz Dirk</dc:creator>
39
        <dc:creator>Pithan Felix</dc:creator>
40
        <dc:creator>Raddatz Thomas</dc:creator>
41
        <dc:creator>Rast Sebastian</dc:creator>
42
        <dc:creator>Redler Rene</dc:creator>
43
        <dc:creator>Roeckner Erich</dc:creator>
44
        <dc:creator>Schmidt Hauke</dc:creator>
45
        <dc:creator>Schnur Reiner</dc:creator>
46
        <dc:creator>Segschneider Joachim</dc:creator>
47
        <dc:creator>Six Katharina D.</dc:creator>
48
        <dc:creator>Stockhause Martina</dc:creator>
49
        <dc:creator>Timmreck Claudia</dc:creator>
50
        <dc:creator>Wegner Jörg</dc:creator>
51
        <dc:creator>Widmann Heinrich</dc:creator>
52
        <dc:creator>Wieners Karl-H.</dc:creator>
53
        <dc:creator>Claussen Martin</dc:creator>
54
        <dc:creator>Marotzke Jochem</dc:creator>
55
        <dc:creator>Stevens Bjorn</dc:creator>
56
        <dc:date/>
23 57
        <dc:description/>
24 58
        <dc:type>publication</dc:type>
25

  
26
        <oaf:relatedIdentifier entityType="dataset" inverseRelationType="isRelatedTo"
27
                               relatedIdentifierType="dnet"
28
                               relationType="isRelatedTo">dli_resolver::d90bf4e9a54d2dffc53b5ec6ce7c2dd6</oaf:relatedIdentifier>
29
        <oaf:relatedIdentifier entityType="dataset" inverseRelationType="isRelatedTo"
30
                               relatedIdentifierType="dnet"
31
                               relationType="isRelatedTo">dli_resolver::17a9492cc1632b6b95c6e7067d145162</oaf:relatedIdentifier>
32
        <oaf:relatedIdentifier entityType="dataset" inverseRelationType="isRelatedTo"
33
                               relatedIdentifierType="dnet"
34
                               relationType="isRelatedTo">dli_resolver::12e836f393a607172c54013f7a4d9816</oaf:relatedIdentifier>
35
        <oaf:relatedIdentifier entityType="dataset" inverseRelationType="isRelatedTo"
36
                               relatedIdentifierType="dnet"
37
                               relationType="isRelatedTo">dli_resolver::c7edca93b5d2524986f2dc1b2c55f600</oaf:relatedIdentifier>
59
        <dc:publisher>Wiley-Blackwell</dc:publisher>
38 60
    </metadata>
39
    <oaf:about>
61
    <oaf:about xmlns:dc="http://purl.org/dc/elements/1.1/">
40 62
        <oaf:datainfo>
41 63
            <oaf:completionStatus>complete</oaf:completionStatus>
42 64

  
43
            <oaf:collectedFrom completionStatus="complete" id="pubmed______" name="PubMed"/>
65
            <oaf:resolvedFrom id="dli_________::crossref" name="Crossref" completionStatus="complete"/>
44 66

  
45 67
        </oaf:datainfo>
46 68
    </oaf:about>
47

  
48
</record>
69
</oai:record>
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/cc/MindistSearchMapper.java
2 2

  
3 3
import java.io.IOException;
4 4

  
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
5 7
import org.apache.hadoop.io.Text;
6 8
import org.apache.hadoop.mapreduce.Mapper;
7 9

  
......
10 12
 */
11 13
public class MindistSearchMapper extends Mapper<Text, VertexWritable, Text, VertexWritable> {
12 14

  
15
	private static final Log log = LogFactory.getLog(MindistSearchMapper.class);
16

  
17
	private boolean debug = false;
18

  
13 19
	@Override
20
	protected void setup(Mapper.Context context) throws IOException, InterruptedException {
21
		super.setup(context);
22

  
23
		debug = context.getConfiguration().getBoolean("mindist_DEBUG", false);
24
		log.info("debug mode: " + debug);
25
	}
26

  
27
	@Override
14 28
	protected void map(Text key, VertexWritable value, Context context) throws IOException, InterruptedException {
15 29

  
16
		context.write(key, value);
30
		emit(key, value, context);
17 31
		if (value.isActivated()) {
18
			VertexWritable writable = new VertexWritable();
19
			for (Text neighborVertex : value.getEdges()) {
20
				if (!neighborVertex.toString().equals(value.getVertexId().toString())) {
21
					writable.setVertexId(value.getVertexId());
22
					writable.setEdges(null);
23
					context.write(neighborVertex, writable);
32
			VertexWritable vertex = new VertexWritable();
33
			for (Text edge : value.getEdges()) {
34
				if (!edge.toString().equals(value.getVertexId().toString())) {
35
					vertex.setVertexId(value.getVertexId());
36
					vertex.setEdges(null);
37
					emit(edge, vertex, context);
24 38
				}
25 39
			}
26 40
		}
27 41
	}
28 42

  
43
	private void emit(final Text key, final VertexWritable vertex, final Context context) throws IOException, InterruptedException {
44
		context.write(key, vertex);
45
		if (debug) {
46
			log.info(vertex.toJSON());
47
		}
48
	}
49

  
29 50
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/cc/MindistSearchReducer.java
15 15

  
16 16
	private static final Log log = LogFactory.getLog(MindistSearchReducer.class);
17 17

  
18
	public static final String UPDATE_COUNTER = "UpdateCounter";
19
	public static final String SKIPPED = "SKIPPED";
20
	public static final String UPDATED = "UPDATED";
21

  
18 22
	private boolean depthOne;
19 23

  
20 24
	private boolean debug = false;
......
28 32
			depthOne = true;
29 33
		}
30 34

  
31
		try {
32
			debug = Boolean.valueOf(context.getConfiguration().get("mindist_DEBUG"));
33
		} catch(Throwable e) {
34
			debug = false;
35
		}
35
		debug = context.getConfiguration().getBoolean("mindist_DEBUG", false);
36 36
		log.info("debug mode: " + debug);
37 37
	}
38 38

  
......
41 41

  
42 42
		VertexWritable realVertex = null;
43 43
		Text currentMinimalKey = null;
44
		boolean foundEdges = false;
44
		//boolean foundEdges = false;
45 45

  
46 46
		if (depthOne) {
47 47
			for (VertexWritable vertex : values) {
48 48
				if (!vertex.isMessage()) {
49 49
					//log.info(String.format("found vertex with edges: %s", key.toString()));
50 50
					realVertex = vertex.clone();
51
					foundEdges = true;
52 51
				}
53 52
			}
54 53

  
55 54
			if (realVertex == null) {
56
				throw new IllegalStateException(String.format("foundEdges: %s, invalid input, key: '%s'", foundEdges, key.toString()));
55
				context.getCounter(UPDATE_COUNTER, SKIPPED).increment(1);
56
				return;
57 57
			}
58 58

  
59 59
			realVertex.setActivated(true);
......
63 63
				realVertex.setVertexId(key);
64 64
			}
65 65

  
66
			context.getCounter("UpdateCounter", "UPDATED").increment(1);
66
			context.getCounter(UPDATE_COUNTER, UPDATED).increment(1);
67 67
		} else {
68 68
			for (VertexWritable vertex : values) {
69 69
				if (!vertex.isMessage()) {
......
82 82
				}
83 83
			}
84 84

  
85
			if (realVertex == null) {
86
				context.getCounter(UPDATE_COUNTER, SKIPPED).increment(1);
87
				return;
88
			}
89

  
85 90
			if (currentMinimalKey != null && currentMinimalKey.compareTo(realVertex.getVertexId()) < 0) {
86 91
				realVertex.setVertexId(currentMinimalKey);
87 92
				realVertex.setActivated(true);
88
				context.getCounter("UpdateCounter", "UPDATED").increment(1);
93
				context.getCounter(UPDATE_COUNTER, UPDATED).increment(1);
89 94
			} else {
90 95
				realVertex.setActivated(false);
91 96
			}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/DedupBuildRootsReducer.java
2 2

  
3 3
import java.io.IOException;
4 4
import java.util.List;
5
import java.util.stream.Collectors;
6
import java.util.stream.StreamSupport;
5 7

  
6 8
import com.google.common.collect.Iterables;
7 9
import com.google.common.collect.Lists;
......
29 31
    private DedupConfig dedupConf;
30 32
    private Ontologies ontologies;
31 33

  
32
	@Override
33
	protected void setup(final Context context) throws IOException, InterruptedException {
34
		super.setup(context);
35
		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
36
		System.out.println("dedup buildRoots reducer\n\nwf conf: " + dedupConf.toString());
34
    @Override
35
    protected void setup(final Context context) throws IOException, InterruptedException {
36
        super.setup(context);
37
        dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
38
        System.out.println("dedup buildRoots reducer\n\nwf conf: " + dedupConf.toString());
37 39

  
38
		ontologies = OntologyLoader.loadOntologies(context.getConfiguration().get(JobParams.ONTOLOGIES));
39
		System.out.println("ontologies: " + ontologies.toJson(true));
40
        ontologies = OntologyLoader.loadOntologies(context.getConfiguration().get(JobParams.ONTOLOGIES));
41
        System.out.println("ontologies: " + ontologies.toJson(true));
40 42

  
41
	}
43
    }
42 44

  
43
	@Override
44
	protected void reduce(final Text key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException, InterruptedException {
45
    @Override
46
    protected void reduce(final Text key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException, InterruptedException {
47
        // ensures we're dealing with a root, otherwise returns
48
        if (!isRoot(key.toString())) {
49
            System.err.println("aborting DedupBuildRootsReducer, found non-root key: " + key);
50
            context.getCounter("DedupBuildRootsReducer", "aborted").increment(1);
51
            return;
52
        }
45 53

  
46
		// ensures we're dealing with a root, otherwise returns
47
		if (!isRoot(key.toString())) {
48
			System.err.println("aborting DedupBuildRootsReducer, found non-root key: " + key);
49
			context.getCounter("DedupBuildRootsReducer", "aborted").increment(1);
50
			return;
51
		}
52 54

  
53
		final byte[] rowkey = Bytes.toBytes(key.toString());
54
		final List<DNGF> entities = Lists.newArrayList();
55
        final byte[] rowkey = Bytes.toBytes(key.toString());
56
        final List<DNGF> entities = Lists.newArrayList();
55 57

  
56
		for (final DNGF oaf : toDNGF(values)) {
57
			switch (oaf.getKind()) {
58
			case entity:
59
				entities.add(oaf);
60
				break;
61
			case relation:
62
				handleRels(context, rowkey, oaf, true);
63
				break;
64
			default:
65
				break;
66
			}
67
		}
68
		// build and emit the root body
69
		final DNGF.Builder builder = DNGFEntityMerger.merge(dedupConf, key.toString(), entities);
70
		if (entities.size() < JobParams.MAX_COUNTERS) {
71
			context.getCounter(dedupConf.getWf().getEntityType() + " root group size", lpad(entities.size())).increment(1);
72
		} else {
73
			context.getCounter(dedupConf.getWf().getEntityType() + " root group size", "> " + JobParams.MAX_COUNTERS).increment(1);
74
		}
58
        for (final DNGF oaf : toDNGF(values)) {
59
            switch (oaf.getKind()) {
60
                case entity:
61
                    entities.add(oaf);
62
                    break;
63
                case relation:
64
                    handleRels(context, rowkey, oaf, true);
65
                    break;
66
                default:
67
                    break;
68
            }
69
        }
70
        // build and emit the root body
71
        final DNGF.Builder builder = DNGFEntityMerger.merge(dedupConf, key.toString(), entities);
72
        if (entities.size() < JobParams.MAX_COUNTERS) {
73
            context.getCounter(dedupConf.getWf().getEntityType() + " root group size", lpad(entities.size())).increment(1);
74
        } else {
75
            context.getCounter(dedupConf.getWf().getEntityType() + " root group size", "> " + JobParams.MAX_COUNTERS).increment(1);
76
        }
75 77

  
76
		final DNGF dngf = builder.build();
77
		final DNGFEntity entity = dngf.getEntity();
78 78

  
79
		try {
79
        final DNGF dngf = builder.build();
80
        final DNGFEntity entity = dngf.getEntity();
80 81

  
81
			context.write(new ImmutableBytesWritable(Bytes.toBytes(entity.getId())), asPut(dngf));
82
		} catch (Throwable e) {
83
			System.out.println("Exception dngf = " + dngf.toString());
84
			context.getCounter(entity.getType().toString(), e.getClass().getName()).increment(1);
85
		}
82
        try {
86 83

  
87
		context.getCounter(entity.getType().toString(), "root").increment(1);
84
            context.write(new ImmutableBytesWritable(Bytes.toBytes(entity.getId())), asPut(dngf));
85
        } catch (Throwable e) {
86
            System.out.println("Exception dngf = " + dngf.toString());
87
            context.getCounter(entity.getType().toString(), e.getClass().getName()).increment(1);
88
        }
88 89

  
89
		// add person rels TODO: remove this hack
90
		// context.getCounter("hack", "personResult out").increment(personMap.size());
90
        context.getCounter(entity.getType().toString(), "root").increment(1);
91 91

  
92
	}
92
        // add person rels TODO: remove this hack
93
        // context.getCounter("hack", "personResult out").increment(personMap.size());
93 94

  
94
	private Iterable<DNGF> toDNGF(final Iterable<ImmutableBytesWritable> values) {
95
		return Iterables.transform(values, ibw -> DNGFDecoder.decode(ibw.copyBytes()).getDNGF());
96
	}
95
    }
97 96

  
98
	private void handleRels(final Context context, final byte[] rowkey, final DNGF rel, final boolean hack) throws IOException, InterruptedException {
97
    private Iterable<DNGF> toDNGF(final Iterable<ImmutableBytesWritable> values) {
98
        return StreamSupport.stream(values.spliterator(), false).map(ibw -> DNGFDecoder.decode(ibw.copyBytes()).getDNGF()).collect(Collectors.toList());
99
    }
99 100

  
100
		if (hack && checkHack(new String(rowkey), rel)) {
101
			context.getCounter("hack", "personResult in").increment(1);
102
		} else {
101
    private void handleRels(final Context context, final byte[] rowkey, final DNGF rel, final boolean hack) throws IOException, InterruptedException {
102
        // emit relation from the root to the related entities
103
        DNGFDecoder decoder = rootToEntity(rowkey, rel, context);
103 104

  
104
			// emit relation from the root to the related entities
105
			DNGFDecoder decoder = rootToEntity(rowkey, rel, context);
106
			context.write(new ImmutableBytesWritable(rowkey), asPutByCollectedFrom(decoder.getDNGF()));
107 105

  
108
			// emit relation from the related entities to the root
109
			decoder = entityToRoot(rowkey, rel, context);
110
			final byte[] revKey = Bytes.toBytes(decoder.relSourceId());
111
			context.write(new ImmutableBytesWritable(revKey), asPutByCollectedFrom(decoder.getDNGF()));
112 106

  
113
            //context.getCounter(FIXED_RELATION, decoder.getRelDescriptor().shortQualifier() + " [entity <-> root]").increment(2);
107
        if ("50|dedup_wf_001::cddddb031b9f6c85046067c0cc9ad147".equals(decoder.getDNGFRel().getSource()))
108
        {
109
            System.out.println(String.format("Writing relation %s    %s     %s ", decoder.getDNGFRel().getSource(),decoder.getDNGFRel().getRelType().getClassname(),decoder.getDNGFRel().getTarget()));
114 110
        }
111
        context.write(new ImmutableBytesWritable(rowkey), asPutByCollectedFrom(decoder.getDNGF()));
112
        // emit relation from the related entities to the root
113
        decoder = entityToRoot(rowkey, rel, context);
114
        if ("50|dedup_wf_001::cddddb031b9f6c85046067c0cc9ad147".equals(decoder.getDNGFRel().getTarget()))
115
        {
116
            System.out.println(String.format("Writing relation %s    %s     %s ", decoder.getDNGFRel().getSource(),decoder.getDNGFRel().getRelType().getClassname(),decoder.getDNGFRel().getTarget()));
117
        }
118
        byte[] revKey = Bytes.toBytes(decoder.relSourceId());
119
        context.write(new ImmutableBytesWritable(revKey), asPutByCollectedFrom(decoder.getDNGF()));
120

  
121
        //context.getCounter(FIXED_RELATION, decoder.getRelDescriptor().shortQualifier() + " [entity <-> root]").increment(2);
122

  
115 123
        // mark relation from the related entities to the duplicate as deleted
116
		DNGFDecoder decoder = markDeleted(rel, true, context);
117
		byte[] revKey = Bytes.toBytes(decoder.relSourceId());
118
		context.write(new ImmutableBytesWritable(revKey), asPut(decoder.getDNGF()));
124
        decoder = markDeleted(rel, true, context);
125
        revKey = Bytes.toBytes(decoder.relSourceId());
126
        context.write(new ImmutableBytesWritable(revKey), asPut(decoder.getDNGF()));
119 127

  
120
		// mark relation from the related entities to the duplicate as deleted
121
		decoder = markDeleted(rel, false, context);
122
		revKey = Bytes.toBytes(decoder.relSourceId());
123
		context.write(new ImmutableBytesWritable(revKey), asPut(decoder.getDNGF()));
128
        // mark relation from the related entities to the duplicate as deleted
129
        decoder = markDeleted(rel, false, context);
130
        revKey = Bytes.toBytes(decoder.relSourceId());
131
        context.write(new ImmutableBytesWritable(revKey), asPut(decoder.getDNGF()));
124 132

  
125 133
        context.getCounter(FIXED_RELATION, decoder.getRelDescriptor().shortQualifier() + " mark deleted [dup <-> entity]").increment(2);
126 134
    }
127 135

  
128
	public boolean checkHack(final String root, final DNGF oaf) {
136
    private boolean md5matches(final String id1, final String id2) {
137
        return id1.replaceAll("^.*\\:\\:", "").equals(id2.replaceAll("^.*\\:\\:", ""));
138
    }
129 139

  
130
		boolean res;
131
		final String type = dedupConf.getWf().getEntityType();
140
    private DNGFDecoder rootToEntity(final byte[] rootRowkey, final DNGF rel, final Context context) {
141
        return patchRelations(rootRowkey, rel, DNGFPatch.rootToEntity, context);
142
    }
132 143

  
133
		if ((type.equals(Type.publication.toString()) || type.equals(Type.dataset.toString())) &&
134
				oaf.getRel().getTargetType().equals(Type.person) && !md5matches(root, oaf.getRel().getSource())) {
144
    private DNGFDecoder entityToRoot(final byte[] rootRowkey, final DNGF rel, final Context context) {
145
        return patchRelations(rootRowkey, rel, DNGFPatch.entityToRoot, context);
146
    }
135 147

  
136
			res = true;
137
		} else {
138
			res = false;
139
		}
148
    private DNGFDecoder markDeleted(final DNGF rel, final boolean reverse, final Context context) {
149
        return deleteRelations(rel, reverse, context);
150
    }
140 151

  
141
		// if (root.equals("50|dedup_wf_001::92f6197ea6f16ae554755aced832fb6f")) {
142
		// System.out.println("##################");
143
		// System.out.println("root  : " + root);
144
		// System.out.println("source: " + oaf.getRel().getSource());
145
		// System.out.println("ckeck:  " + res);
146
		// }
152
    // patches relation objects setting the source field with the root id
153
    private DNGFDecoder patchRelations(final byte[] rootRowkey, final DNGF rel, final DNGFPatch patchKind, final Context context) {
154
        final String id = new String(rootRowkey);
147 155

  
148
		return res;
149
	}
156
        if ("50|dedup_wf_001::cddddb031b9f6c85046067c0cc9ad147".equals(id)) {
157
            System.out.println("#########################\n\n");
158
        }
150 159

  
151
	private boolean md5matches(final String id1, final String id2) {
152
		return id1.replaceAll("^.*\\:\\:", "").equals(id2.replaceAll("^.*\\:\\:", ""));
153
	}
154 160

  
155
	private DNGFDecoder rootToEntity(final byte[] rootRowkey, final DNGF rel, final Context context) {
156
		return patchRelations(rootRowkey, rel, DNGFPatch.rootToEntity, context);
157
	}
161
        final DNGFRelDecoder decoder = DNGFRelDecoder.decode(rel.getRel());
162
        final DNGF.Builder builder = DNGF.newBuilder(rel);
163
        builder.getDataInfoBuilder().setInferred(true).setDeletedbyinference(false);
164
        switch (patchKind) {
165
            case rootToEntity:
166
                // builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:rootToEntity)");
167
                builder.getRelBuilder().setSource(new String(rootRowkey));
168
                break;
158 169

  
159
	private DNGFDecoder entityToRoot(final byte[] rootRowkey, final DNGF rel, final Context context) {
160
		return patchRelations(rootRowkey, rel, DNGFPatch.entityToRoot, context);
161
	}
170
            case entityToRoot:
171
                builder.setRel(decoder.setClassId(getInverse(decoder, context)));
172
                // builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:entityToRoot)");
173
                builder.getRelBuilder().setSource(builder.getRel().getTarget());
174
                builder.getRelBuilder().setTarget(new String(rootRowkey));
175
                final Type sourceType = builder.getRelBuilder().getSourceType();
176
                builder.getRelBuilder().setSourceType(builder.getRel().getTargetType());
177
                builder.getRelBuilder().setTargetType(sourceType);
178
                break;
162 179

  
163
	private DNGFDecoder markDeleted(final DNGF rel, final boolean reverse, final Context context) {
164
		return deleteRelations(rel, reverse, context);
165
	}
180
            default:
181
                break;
182
        }
166 183

  
167
	// patches relation objects setting the source field with the root id
168
	private DNGFDecoder patchRelations(final byte[] rootRowkey, final DNGF rel, final DNGFPatch patchKind, final Context context) {
169
		final DNGFRelDecoder decoder = DNGFRelDecoder.decode(rel.getRel());
170
		final DNGF.Builder builder = DNGF.newBuilder(rel);
171
		builder.getDataInfoBuilder().setInferred(true).setDeletedbyinference(false);
172
		switch (patchKind) {
173
		case rootToEntity:
174
			// builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:rootToEntity)");
175
			builder.getRelBuilder().setSource(new String(rootRowkey));
176
			break;
184
        return DNGFDecoder.decode(builder.build());
185
    }
177 186

  
178
		case entityToRoot:
179
			builder.setRel(decoder.setClassId(getInverse(decoder, context)));
180
			// builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:entityToRoot)");
181
			builder.getRelBuilder().setSource(builder.getRel().getTarget());
182
			builder.getRelBuilder().setTarget(new String(rootRowkey));
183
            final Type sourceType = builder.getRelBuilder().getSourceType();
184
            builder.getRelBuilder().setSourceType(builder.getRel().getTargetType());
185
            builder.getRelBuilder().setTargetType(sourceType);
186
            break;
187

  
188
		default:
189
			break;
190
		}
191

  
192
		return DNGFDecoder.decode(builder.build());
193
	}
194

  
195
	private String getInverse(final DNGFRelDecoder decoder, final Context context) {
196
		final String inverse = ontologies.inverseOf(decoder.getRelDescriptor());
197
		if (StringUtils.isBlank(inverse)) {
187
    private String getInverse(final DNGFRelDecoder decoder, final Context context) {
188
        final String inverse = ontologies.inverseOf(decoder.getRelDescriptor());
189
        if (StringUtils.isBlank(inverse)) {
198 190
            //context.getCounter("unmapped relationship", decoder.getRelDescriptor().shortQualifier()).increment(1);
199 191
            return "unknown";
200
		}
201
		return inverse;
202
	}
192
        }
193
        return inverse;
194
    }
203 195

  
204
	private DNGFDecoder deleteRelations(final DNGF rel, final boolean reverse, final Context context) {
205
		final DNGF.Builder builder = DNGF.newBuilder(rel);
206
		// builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots d: " + reverse + ")");
207
		builder.getDataInfoBuilder().setDeletedbyinference(true);
196
    private DNGFDecoder deleteRelations(final DNGF rel, final boolean reverse, final Context context) {
197
        final DNGF.Builder builder = DNGF.newBuilder(rel);
198
        // builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots d: " + reverse + ")");
199
        builder.getDataInfoBuilder().setDeletedbyinference(true);
208 200

  
209
		if (reverse) {
210
			final DNGFRelDecoder decoder = DNGFRelDecoder.decode(rel.getRel());
201
        if (reverse) {
202
            final DNGFRelDecoder decoder = DNGFRelDecoder.decode(rel.getRel());
211 203

  
212
			builder.setRel(decoder.setClassId(getInverse(decoder, context)));
213
			// swap source and target
214
			final String tmp = builder.getRel().getSource();
204
            builder.setRel(decoder.setClassId(getInverse(decoder, context)));
205
            // swap source and target
206
            final String tmp = builder.getRel().getSource();
215 207
            final Type sType = builder.getRel().getSourceType();
216 208
            builder.getRelBuilder().setSource(builder.getRel().getTarget());
217
			builder.getRelBuilder().setTarget(tmp);
209
            builder.getRelBuilder().setTarget(tmp);
218 210
            builder.getRelBuilder().setSourceType(builder.getRel().getTargetType());
219 211
            builder.getRelBuilder().setTargetType(sType);
220 212
        }
221 213

  
222
		return DNGFDecoder.decode(builder.build());
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff