Revision 40247
Added by Claudio Atzori almost 9 years ago
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/dedup.closeMesh.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="adaa9b08-a5df-4fd3-ad00-37153eac0010_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>Close Similarity Mesh</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="manual"> |
|
15 |
|
|
16 |
|
|
17 |
<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true"> |
|
18 |
<DESCRIPTION /> |
|
19 |
<PARAMETERS> |
|
20 |
<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM> |
|
21 |
<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM> |
|
22 |
</PARAMETERS> |
|
23 |
<ARCS> |
|
24 |
<ARC to="waitConf" /> |
|
25 |
</ARCS> |
|
26 |
</NODE> |
|
27 |
<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true"> |
|
28 |
<DESCRIPTION>Set table name</DESCRIPTION> |
|
29 |
<PARAMETERS> |
|
30 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
31 |
<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM> |
|
32 |
</PARAMETERS> |
|
33 |
<ARCS> |
|
34 |
<ARC to="waitConf" /> |
|
35 |
</ARCS> |
|
36 |
</NODE> |
|
37 |
|
|
38 |
<NODE name="waitConf" isJoin="true"> |
|
39 |
<DESCRIPTION/> |
|
40 |
<PARAMETERS/> |
|
41 |
<ARCS> |
|
42 |
<ARC to="entitySequence" /> |
|
43 |
</ARCS> |
|
44 |
</NODE> |
|
45 |
|
|
46 |
<NODE name="entitySequence" type="CheckEntitySequence"> |
|
47 |
<DESCRIPTION>Check entity sequence</DESCRIPTION> |
|
48 |
<PARAMETERS> |
|
49 |
<PARAM required="true" type="string" name="entitySequence" managedBy="system">organization,result</PARAM> |
|
50 |
<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM> |
|
51 |
</PARAMETERS> |
|
52 |
<ARCS> |
|
53 |
<ARC to="dedupGrouper" /> |
|
54 |
<ARC name="done" to="success" /> |
|
55 |
</ARCS> |
|
56 |
</NODE> |
|
57 |
|
|
58 |
|
|
59 |
<NODE name="dedupGrouper" type="DedupGrouperJob"> |
|
60 |
<DESCRIPTION>dedup grouper</DESCRIPTION> |
|
61 |
<PARAMETERS> |
|
62 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupGrouperJob</PARAM> |
|
63 |
<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM> |
|
64 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
65 |
{ |
|
66 |
'dedup.conf' : 'dedup.conf', |
|
67 |
'entityTypeId' : 'entityTypeId', |
|
68 |
'entityType' : 'entityType', |
|
69 |
'cluster' : 'cluster', |
|
70 |
'hbase.mapred.inputtable' : 'tableName', |
|
71 |
'hbase.mapred.outputtable' : 'tableName', |
|
72 |
'hbase.mapreduce.inputtable' : 'tableName', |
|
73 |
'hbase.mapreduce.outputtable' : 'tableName' |
|
74 |
} |
|
75 |
</PARAM> |
|
76 |
</PARAMETERS> |
|
77 |
<ARCS> |
|
78 |
<ARC to="dedupGrouper" /> |
|
79 |
<ARC name="done" to="findRoots" /> |
|
80 |
</ARCS> |
|
81 |
</NODE> |
|
82 |
<NODE name="findRoots" type="SubmitHadoopJob"> |
|
83 |
<DESCRIPTION>find roots</DESCRIPTION> |
|
84 |
<PARAMETERS> |
|
85 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupFindRootsJob</PARAM> |
|
86 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
87 |
{ |
|
88 |
'dedup.conf' : 'dedup.conf', |
|
89 |
'entityTypeId' : 'entityTypeId', |
|
90 |
'entityType' : 'entityType', |
|
91 |
'cluster' : 'cluster', |
|
92 |
'hbase.mapred.inputtable' : 'tableName', |
|
93 |
'hbase.mapred.outputtable' : 'tableName', |
|
94 |
'hbase.mapreduce.inputtable' : 'tableName', |
|
95 |
'hbase.mapreduce.outputtable' : 'tableName' |
|
96 |
} |
|
97 |
</PARAM> |
|
98 |
</PARAMETERS> |
|
99 |
<ARCS> |
|
100 |
<ARC to="buildRoots" /> |
|
101 |
</ARCS> |
|
102 |
</NODE> |
|
103 |
<NODE name="buildRoots" type="SubmitHadoopJob"> |
|
104 |
<DESCRIPTION>redirect rels</DESCRIPTION> |
|
105 |
<PARAMETERS> |
|
106 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupBuildRootsJob</PARAM> |
|
107 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
108 |
{ |
|
109 |
'dedup.conf' : 'dedup.conf', |
|
110 |
'relClasses' : 'relClasses', |
|
111 |
'entityTypeId' : 'entityTypeId', |
|
112 |
'entityType' : 'entityType', |
|
113 |
'cluster' : 'cluster', |
|
114 |
'hbase.mapred.inputtable' : 'tableName', |
|
115 |
'hbase.mapreduce.inputtable' : 'tableName', |
|
116 |
'hbase.mapred.outputtable' : 'tableName', |
|
117 |
'hbase.mapreduce.outputtable' : 'tableName' |
|
118 |
} |
|
119 |
</PARAM> |
|
120 |
</PARAMETERS> |
|
121 |
<ARCS> |
|
122 |
<ARC to="entitySequence" /> |
|
123 |
</ARCS> |
|
124 |
</NODE> |
|
125 |
|
|
126 |
|
|
127 |
</CONFIGURATION> |
|
128 |
<STATUS /> |
|
129 |
</BODY> |
|
130 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/persons_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="originalid" select="//FIELD[@name = 'personid']" /> |
|
13 |
<xsl:variable name="personid" select="dnet:oafSplitId('person', $originalid)" /> |
|
14 |
<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" /> |
|
15 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" /> |
|
16 |
<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" /> |
|
17 |
|
|
18 |
<xsl:variable name="person" select="dnet:oafEntity('person', $personid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/> |
|
19 |
|
|
20 |
<ROWS> |
|
21 |
<xsl:if test="string-length($personid) > 0"> |
|
22 |
<ROW key="{$personid}" columnFamily="person"> |
|
23 |
<QUALIFIER name="body" type="base64"><xsl:value-of select="$person"/></QUALIFIER> |
|
24 |
</ROW> |
|
25 |
</xsl:if> |
|
26 |
</ROWS> |
|
27 |
|
|
28 |
</xsl:template> |
|
29 |
|
|
30 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projectcontactperson_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', //FIELD[@name = 'project'])" /> |
|
13 |
<xsl:variable name="contactpersonId" select="dnet:oafSplitId('person', //FIELD[@name = 'contactperson'])" /> |
|
14 |
|
|
15 |
<ROWS> |
|
16 |
<xsl:if test="string-length($projectId) > 0 and string-length($contactpersonId) > 0"> |
|
17 |
|
|
18 |
<xsl:variable name="projectcontactperson" |
|
19 |
select="dnet:oafRel('projectPerson', $projectId, $contactpersonId, //FIELD[not(@isNull)], 'hasContact', 'dnet:project_person_relations')"/> |
|
20 |
<xsl:variable name="contactpersonproject" |
|
21 |
select="dnet:oafRel('projectPerson', $contactpersonId, $projectId, //FIELD[not(@isNull)], 'isContact', 'dnet:project_person_relations')"/> |
|
22 |
|
|
23 |
<ROW key="{$projectId}" columnFamily="projectPerson_contactPerson_hasContact"> |
|
24 |
<QUALIFIER name="{$contactpersonId}" type="base64"><xsl:value-of select="$projectcontactperson"/></QUALIFIER> |
|
25 |
</ROW> |
|
26 |
<ROW key="{$contactpersonId}" columnFamily="projectPerson_contactPerson_isContact"> |
|
27 |
<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="$contactpersonproject"/></QUALIFIER> |
|
28 |
</ROW> |
|
29 |
</xsl:if> |
|
30 |
</ROWS> |
|
31 |
|
|
32 |
</xsl:template> |
|
33 |
|
|
34 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DmfToHbaseXsltFunctions" |
|
7 |
xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" |
|
8 |
extension-element-prefixes="exslt" |
|
9 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
12 |
|
|
13 |
<xsl:template match="/*"> |
|
14 |
|
|
15 |
<xsl:variable name="provenance"><xsl:value-of select="//FIELD[@name = 'provenance']"/></xsl:variable> |
|
16 |
<xsl:variable name="trust">0.9</xsl:variable> |
|
17 |
|
|
18 |
<xsl:choose> |
|
19 |
<xsl:when test="count(//RELATION) = 0"> |
|
20 |
<ROWS /> |
|
21 |
</xsl:when> |
|
22 |
<xsl:otherwise> |
|
23 |
<ROWS> |
|
24 |
<xsl:for-each select="//RELATION"> |
|
25 |
<xsl:choose> |
|
26 |
<xsl:when test="./@type = 'resultProject'"> |
|
27 |
<xsl:variable name="resultId" select="./@source" /> |
|
28 |
<xsl:variable name="projectId"> |
|
29 |
<xsl:choose> |
|
30 |
<xsl:when test="starts-with(@target, '40|')"> |
|
31 |
<xsl:value-of select="./@target" /> |
|
32 |
</xsl:when> |
|
33 |
<xsl:otherwise> |
|
34 |
<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/> |
|
35 |
</xsl:otherwise> |
|
36 |
</xsl:choose> |
|
37 |
</xsl:variable> |
|
38 |
|
|
39 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
40 |
<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="dnet:oafResultProject_Outcome_FromDMF($resultId, $projectId, 'isProducedBy', $provenance, $trust)"/></QUALIFIER> |
|
41 |
</ROW> |
|
42 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
43 |
<QUALIFIER name="{$resultId}" type="base64"><xsl:value-of select="dnet:oafResultProject_Outcome_FromDMF($projectId, $resultId, 'produces', $provenance, $trust)"/></QUALIFIER> |
|
44 |
</ROW> |
|
45 |
</xsl:when> |
|
46 |
<xsl:when test="./@type = 'resultResult_publicationdataset_isRelatedTo'"> |
|
47 |
<xsl:variable name="source" select="./@source" /> |
|
48 |
<xsl:variable name="target" select="./@target" /> |
|
49 |
|
|
50 |
<ROW key="{$source}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
51 |
<QUALIFIER name="{$target}" type="base64"><xsl:value-of select="dnet:oafResultResult_PublicationDataset_FromDMF($source, $target, 'isRelatedTo', $provenance, $trust)"/></QUALIFIER> |
|
52 |
</ROW> |
|
53 |
<ROW key="{$target}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
54 |
<QUALIFIER name="{$source}" type="base64"><xsl:value-of select="dnet:oafResultResult_PublicationDataset_FromDMF($target, $source, 'isRelatedTo', $provenance, $trust)"/></QUALIFIER> |
|
55 |
</ROW> |
|
56 |
</xsl:when> |
|
57 |
</xsl:choose> |
|
58 |
</xsl:for-each> |
|
59 |
</ROWS> |
|
60 |
</xsl:otherwise> |
|
61 |
</xsl:choose> |
|
62 |
</xsl:template> |
|
63 |
|
|
64 |
|
|
65 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/organizations_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="originalid" select="//FIELD[@name = 'organizationid']" /> |
|
13 |
<xsl:variable name="organizationid" select="dnet:oafSplitId('organization', $originalid)" /> |
|
14 |
<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" /> |
|
15 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" /> |
|
16 |
<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" /> |
|
17 |
|
|
18 |
<xsl:variable name="organization" select="dnet:oafEntity('organization', $organizationid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/> |
|
19 |
|
|
20 |
<ROWS> |
|
21 |
<xsl:if test="string-length($organizationid) > 0"> |
|
22 |
<ROW key="{$organizationid}" columnFamily="organization"> |
|
23 |
<QUALIFIER name="body" type="base64"><xsl:value-of select="$organization"/></QUALIFIER> |
|
24 |
</ROW> |
|
25 |
</xsl:if> |
|
26 |
</ROWS> |
|
27 |
|
|
28 |
</xsl:template> |
|
29 |
|
|
30 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/datasourceorganization_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="datasourceId" select="dnet:oafSplitId('datasource', //FIELD[@name = 'datasource'])" /> |
|
13 |
<xsl:variable name="organizationId" select="dnet:oafSplitId('organization', //FIELD[@name = 'organization'])" /> |
|
14 |
|
|
15 |
<ROWS> |
|
16 |
<xsl:if test="string-length($datasourceId) > 0 and string-length($organizationId) > 0"> |
|
17 |
|
|
18 |
<xsl:variable name="datasourceorganization" select="dnet:oafRel('datasourceOrganization', $datasourceId, $organizationId, //FIELD[not(@isNull)], 'provides', 'dnet:datasources_organizations_typologies')"/> |
|
19 |
<xsl:variable name="organizationdatasource" select="dnet:oafRel('datasourceOrganization', $organizationId, $datasourceId, //FIELD[not(@isNull)], 'isProvidedBy', 'dnet:datasources_organizations_typologies')"/> |
|
20 |
|
|
21 |
<ROW key="{$datasourceId}" columnFamily="datasourceOrganization_provision_provides"> |
|
22 |
<QUALIFIER name="{$organizationId}" type="base64"><xsl:value-of select="$datasourceorganization"/></QUALIFIER> |
|
23 |
</ROW> |
|
24 |
<ROW key="{$organizationId}" columnFamily="datasourceOrganization_provision_isProvidedBy"> |
|
25 |
<QUALIFIER name="{$datasourceId}" type="base64"><xsl:value-of select="$organizationdatasource"/></QUALIFIER> |
|
26 |
</ROW> |
|
27 |
</xsl:if> |
|
28 |
</ROWS> |
|
29 |
|
|
30 |
</xsl:template> |
|
31 |
|
|
32 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projectorganization_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', //FIELD[@name = 'project'])" /> |
|
13 |
<xsl:variable name="organizationId" select="dnet:oafSplitId('organization', //FIELD[@name = 'resporganization'])" /> |
|
14 |
|
|
15 |
<ROWS> |
|
16 |
<xsl:if test="string-length($projectId) > 0 and string-length($organizationId) > 0"> |
|
17 |
|
|
18 |
<xsl:variable name="projectorganization" |
|
19 |
select="dnet:oafRel('projectOrganization', $projectId, $organizationId, //FIELD[not(@isNull)], 'hasParticipant', 'dnet:project_organization_relations')"/> |
|
20 |
<xsl:variable name="organizationproject" |
|
21 |
select="dnet:oafRel('projectOrganization', $organizationId, $projectId, //FIELD[not(@isNull)], 'isParticipant', 'dnet:project_organization_relations')"/> |
|
22 |
|
|
23 |
<ROW key="{$projectId}" columnFamily="projectOrganization_participation_hasParticipant"> |
|
24 |
<QUALIFIER name="{$organizationId}" type="base64"><xsl:value-of select="$projectorganization"/></QUALIFIER> |
|
25 |
</ROW> |
|
26 |
<ROW key="{$organizationId}" columnFamily="projectOrganization_participation_isParticipant"> |
|
27 |
<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="$organizationproject"/></QUALIFIER> |
|
28 |
</ROW> |
|
29 |
</xsl:if> |
|
30 |
</ROWS> |
|
31 |
|
|
32 |
</xsl:template> |
|
33 |
|
|
34 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projects_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="originalid" select="//FIELD[@name = 'projectid']" /> |
|
13 |
<xsl:variable name="projectid" select="dnet:oafSplitId('project', $originalid)" /> |
|
14 |
<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" /> |
|
15 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" /> |
|
16 |
<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" /> |
|
17 |
|
|
18 |
<xsl:variable name="project" select="dnet:oafEntity('project', $projectid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/> |
|
19 |
|
|
20 |
<ROWS> |
|
21 |
<xsl:if test="string-length($projectid) > 0"> |
|
22 |
<ROW key="{$projectid}" columnFamily="project"> |
|
23 |
<QUALIFIER name="body" type="base64"><xsl:value-of select="$project"/></QUALIFIER> |
|
24 |
</ROW> |
|
25 |
</xsl:if> |
|
26 |
</ROWS> |
|
27 |
|
|
28 |
</xsl:template> |
|
29 |
|
|
30 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/datasources_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="originalid" select="//FIELD[@name = 'datasourceid']" /> |
|
13 |
<xsl:variable name="datasourceid" select="dnet:oafSplitId('datasource', $originalid)" /> |
|
14 |
<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" /> |
|
15 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" /> |
|
16 |
<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" /> |
|
17 |
|
|
18 |
<xsl:variable name="datasource" select="dnet:oafEntity('datasource', $datasourceid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/> |
|
19 |
|
|
20 |
<ROWS> |
|
21 |
<xsl:if test="string-length($datasourceid) > 0"> |
|
22 |
<ROW key="{$datasourceid}" columnFamily="datasource"> |
|
23 |
<QUALIFIER name="body" type="base64"><xsl:value-of select="$datasource"/></QUALIFIER> |
|
24 |
</ROW> |
|
25 |
</xsl:if> |
|
26 |
</ROWS> |
|
27 |
|
|
28 |
</xsl:template> |
|
29 |
|
|
30 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/claimUpdates_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
5 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions" |
|
6 |
xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils" |
|
7 |
xmlns:exslt="http://exslt.org/common" |
|
8 |
version="1.0" |
|
9 |
extension-element-prefixes="exslt" |
|
10 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date"> |
|
11 |
|
|
12 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
13 |
|
|
14 |
<xsl:param name="trust" select="string('0.9')"/> |
|
15 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
16 |
<xsl:param name="namespaceprefix" select="string('unknown_____')"/> |
|
17 |
|
|
18 |
<xsl:template match="/*"> |
|
19 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
20 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
21 |
|
|
22 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/> |
|
23 |
<xsl:variable name="collectedDatasourceid"> |
|
24 |
<xsl:choose> |
|
25 |
<xsl:when test="string-length(//oaf:collectedDatasourceid) > 0"> |
|
26 |
<xsl:value-of select="//oaf:collectedDatasourceid"/> |
|
27 |
</xsl:when> |
|
28 |
<xsl:otherwise> |
|
29 |
<xsl:value-of select="UNKNOWN"/> |
|
30 |
</xsl:otherwise> |
|
31 |
</xsl:choose> |
|
32 |
</xsl:variable> |
|
33 |
|
|
34 |
<xsl:choose> |
|
35 |
<xsl:when test="count($metadata) = 0"> |
|
36 |
<ROWS/> |
|
37 |
</xsl:when> |
|
38 |
<xsl:otherwise> |
|
39 |
|
|
40 |
<xsl:variable name="objidentifier" select="//record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']"/> |
|
41 |
|
|
42 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)"/> |
|
43 |
|
|
44 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
45 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
46 |
|
|
47 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
48 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
49 |
|
|
50 |
<!-- <xsl:variable name="country" select="substring(//dr:repositoryCountry, 1, 200)" /> --> |
|
51 |
<!-- <xsl:variable name="accessmode" select="substring(//oaf:accessrights, 1, 200)" /> --> |
|
52 |
|
|
53 |
<xsl:variable name="result" |
|
54 |
select="dnet:oafResultFromInfoPackage($resultId, $about, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection, $metadata)"/> |
|
55 |
|
|
56 |
<ROWS> |
|
57 |
<ROW key="{$resultId}" columnFamily="result"> |
|
58 |
<QUALIFIER name="{concat('update_', date:now())}" type="base64"> |
|
59 |
<xsl:value-of select="$result"/> |
|
60 |
</QUALIFIER> |
|
61 |
</ROW> |
|
62 |
</ROWS> |
|
63 |
</xsl:otherwise> |
|
64 |
</xsl:choose> |
|
65 |
</xsl:template> |
|
66 |
|
|
67 |
</xsl:stylesheet> |
|
68 |
|
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/msro/workflows/dedup/dissimilarity_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="type" select="/ROW/FIELD[@name = 'entitytype']" /> |
|
13 |
<xsl:variable name="source" select="dnet:oafSimpleId($type, /ROW/FIELD[@name = 'id1'])" /> |
|
14 |
<xsl:variable name="target" select="dnet:oafSimpleId($type, /ROW/FIELD[@name = 'id2'])" /> |
|
15 |
|
|
16 |
<xsl:variable name="columnFamily"> |
|
17 |
<xsl:choose> |
|
18 |
<xsl:when test="$type = 'result'">resultResult_dedupSimilarity_isSimilarTo</xsl:when> |
|
19 |
<xsl:when test="$type = 'person'">personPerson_dedupSimilarity_isSimilarTo</xsl:when> |
|
20 |
<xsl:when test="$type = 'organization'">organizationOrganization_dedupSimilarity_isSimilarTo</xsl:when> |
|
21 |
</xsl:choose> |
|
22 |
</xsl:variable> |
|
23 |
|
|
24 |
<ROWS> |
|
25 |
<xsl:if test="string-length($source) > 0 and string-length($target) > 0 and string-length($columnFamily) > 0"> |
|
26 |
<ROW key="{$source}" columnFamily="{$columnFamily}"> |
|
27 |
<QUALIFIER name="{$target}" type="base64"></QUALIFIER> |
|
28 |
</ROW> |
|
29 |
<ROW key="{$target}" columnFamily="{$columnFamily}"> |
|
30 |
<QUALIFIER name="{$source}" type="base64"></QUALIFIER> |
|
31 |
</ROW> |
|
32 |
</xsl:if> |
|
33 |
</ROWS> |
|
34 |
|
|
35 |
</xsl:template> |
|
36 |
|
|
37 |
</xsl:stylesheet> |
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/msro/workflows/dedup/similarity_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions"> |
|
7 |
|
|
8 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
9 |
|
|
10 |
<xsl:template match="/"> |
|
11 |
|
|
12 |
<xsl:variable name="source" select="/SIMILARITY/SOURCE/@id" /> |
|
13 |
<xsl:variable name="target" select="/SIMILARITY/TARGET/@id" /> |
|
14 |
<xsl:variable name="type" select="/SIMILARITY/TYPE/@name" /> |
|
15 |
|
|
16 |
<xsl:variable name="columnFamily"> |
|
17 |
<xsl:choose> |
|
18 |
<xsl:when test="$type = 'result'">resultResult_dedupSimilarity_isSimilarTo</xsl:when> |
|
19 |
<xsl:when test="$type = 'person'">personPerson_dedupSimilarity_isSimilarTo</xsl:when> |
|
20 |
<xsl:when test="$type = 'organization'">organizationOrganization_dedupSimilarity_isSimilarTo</xsl:when> |
|
21 |
</xsl:choose> |
|
22 |
</xsl:variable> |
|
23 |
|
|
24 |
<ROWS> |
|
25 |
<xsl:if test="string-length($source) > 0 and string-length($target) > 0 and string-length($columnFamily) > 0"> |
|
26 |
<ROW key="{$source}" columnFamily="{$columnFamily}"> |
|
27 |
<QUALIFIER name="{$target}" type="base64"></QUALIFIER> |
|
28 |
</ROW> |
|
29 |
</xsl:if> |
|
30 |
</ROWS> |
|
31 |
|
|
32 |
</xsl:template> |
|
33 |
|
|
34 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/test/java/eu/dnetlib/msro/openaireplus/workflows/hbase/ClaimRels2HbaseTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.hbase; |
2 | 2 |
|
3 |
import java.io.InputStream; |
|
3 | 4 |
import java.io.StringReader; |
4 | 5 |
|
6 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
7 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
5 | 8 |
import org.apache.commons.codec.binary.Base64; |
6 | 9 |
import org.apache.commons.io.IOUtils; |
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
7 | 12 |
import org.dom4j.Document; |
13 |
import org.dom4j.DocumentException; |
|
8 | 14 |
import org.dom4j.io.SAXReader; |
15 |
import org.junit.Before; |
|
9 | 16 |
import org.junit.Test; |
10 | 17 |
import org.springframework.core.io.ClassPathResource; |
11 | 18 |
import org.springframework.core.io.Resource; |
12 | 19 |
|
13 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
14 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
15 |
|
|
16 | 20 |
public class ClaimRels2HbaseTest { |
17 | 21 |
|
18 |
private ApplyXslt xslt = new ApplyXslt(new ClassPathResource("/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl"));
|
|
22 |
private static final Log log = LogFactory.getLog(ClaimRels2HbaseTest.class);
|
|
19 | 23 |
|
24 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/"; |
|
25 |
|
|
26 |
private ApplyXslt xslt = null; |
|
27 |
|
|
20 | 28 |
private SAXReader reader = new SAXReader(); |
21 | 29 |
|
30 |
@Before |
|
31 |
public void setUp() { |
|
32 |
xslt = new ApplyXslt(loadFromTransformationProfile("claimrels_2_hbase.xsl")); |
|
33 |
} |
|
34 |
|
|
22 | 35 |
@Test |
23 | 36 |
public void testXslt() throws Exception { |
24 | 37 |
|
... | ... | |
44 | 57 |
|
45 | 58 |
} |
46 | 59 |
|
60 |
private String loadFromTransformationProfile(final String profilePath) { |
|
61 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
62 |
InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
63 |
SAXReader saxReader = new SAXReader(); |
|
64 |
Document doc = null; |
|
65 |
try { |
|
66 |
doc = saxReader.read(profile); |
|
67 |
} catch (DocumentException e) { |
|
68 |
e.printStackTrace(); |
|
69 |
throw new RuntimeException(e); |
|
70 |
} |
|
71 |
String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
72 |
return xslt; |
|
73 |
} |
|
74 |
|
|
75 |
|
|
47 | 76 |
} |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/test/java/eu/dnetlib/msro/openaireplus/workflows/index/OpenaireLayoutToRecordStylesheetTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.index; |
2 | 2 |
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 | 3 |
import java.io.IOException; |
7 | 4 |
import java.io.InputStream; |
8 | 5 |
import java.io.StringReader; |
9 | 6 |
import java.io.StringWriter; |
10 |
|
|
7 |
import javax.xml.stream.XMLStreamException; |
|
11 | 8 |
import javax.xml.transform.Transformer; |
12 | 9 |
import javax.xml.transform.TransformerException; |
13 | 10 |
import javax.xml.transform.TransformerFactory; |
14 | 11 |
import javax.xml.transform.stream.StreamResult; |
15 | 12 |
import javax.xml.transform.stream.StreamSource; |
16 | 13 |
|
17 |
import org.apache.commons.io.IOUtils; |
|
18 |
import org.apache.solr.common.SolrInputDocument; |
|
19 |
import org.junit.Test; |
|
20 |
import org.springframework.core.io.ClassPathResource; |
|
21 |
|
|
22 | 14 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
23 | 15 |
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory; |
24 | 16 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
25 | 17 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
26 | 18 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
19 |
import org.apache.commons.io.IOUtils; |
|
20 |
import org.apache.solr.common.SolrInputDocument; |
|
21 |
import org.junit.Test; |
|
22 |
import org.springframework.core.io.ClassPathResource; |
|
27 | 23 |
|
24 |
import static org.junit.Assert.assertFalse; |
|
25 |
import static org.junit.Assert.assertNotNull; |
|
26 |
|
|
28 | 27 |
public class OpenaireLayoutToRecordStylesheetTest { |
29 | 28 |
|
30 | 29 |
private static final String OPENAIRE_LAYOUT_TO_RECORD_STYLESHEET_XSL = "/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl"; |
... | ... | |
34 | 33 |
private static final String OAF_RECORD = "/eu/dnetlib/msro/openaireplus/workflows/index/oafRecord.xml"; |
35 | 34 |
|
36 | 35 |
@Test |
37 |
public void test1() throws ISLookUpException, IOException, TransformerException { |
|
36 |
public void test1() throws ISLookUpException, IOException, TransformerException, XMLStreamException {
|
|
38 | 37 |
String xsl = prepareXslt("DMF"); |
39 | 38 |
assertNotNull(xsl); |
40 | 39 |
assertFalse(xsl.isEmpty()); |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/index/PrepareIndexDataJobNode.java | ||
---|---|---|
4 | 4 |
import java.io.InputStream; |
5 | 5 |
import java.io.StringReader; |
6 | 6 |
import java.io.StringWriter; |
7 |
|
|
8 | 7 |
import javax.annotation.Resource; |
9 | 8 |
import javax.xml.transform.Transformer; |
10 | 9 |
import javax.xml.transform.TransformerException; |
... | ... | |
12 | 11 |
import javax.xml.transform.stream.StreamResult; |
13 | 12 |
import javax.xml.transform.stream.StreamSource; |
14 | 13 |
|
15 |
import org.apache.commons.io.IOUtils; |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
import org.apache.commons.logging.Log; |
|
18 |
import org.apache.commons.logging.LogFactory; |
|
19 |
import org.springframework.beans.factory.annotation.Required; |
|
20 |
import org.springframework.core.io.ClassPathResource; |
|
21 |
|
|
22 | 14 |
import com.googlecode.sarasvati.Arc; |
23 | 15 |
import com.googlecode.sarasvati.NodeToken; |
24 |
|
|
25 | 16 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; |
26 | 17 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
27 | 18 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
... | ... | |
29 | 20 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
30 | 21 |
import eu.dnetlib.miscutils.functional.hash.Hashing; |
31 | 22 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
23 |
import org.apache.commons.io.IOUtils; |
|
24 |
import org.apache.commons.lang.StringUtils; |
|
25 |
import org.apache.commons.logging.Log; |
|
26 |
import org.apache.commons.logging.LogFactory; |
|
27 |
import org.springframework.beans.factory.annotation.Required; |
|
28 |
import org.springframework.core.io.ClassPathResource; |
|
32 | 29 |
|
33 | 30 |
public class PrepareIndexDataJobNode extends SimpleJobNode { |
34 | 31 |
|
... | ... | |
83 | 80 |
return Arc.DEFAULT_ARC; |
84 | 81 |
} |
85 | 82 |
|
83 |
protected String tableName(final NodeToken token) { |
|
84 |
if (token.getEnv().hasAttribute("hbaseTable")) { |
|
85 |
String table = token.getEnv().getAttribute("hbaseTable"); |
|
86 |
log.debug("found override value in wfEnv for 'hbaseTable' param: " + table); |
|
87 |
return table; |
|
88 |
} |
|
89 |
return getHbaseTable(); |
|
90 |
} |
|
91 |
|
|
86 | 92 |
public String getIndexSolrUrlZk() throws ISLookUpException { |
87 | 93 |
return getServiceConfigValue("for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()"); |
88 | 94 |
} |
... | ... | |
112 | 118 |
} |
113 | 119 |
|
114 | 120 |
private String getFileName(final NodeToken token, final String fileNamePrefix) { |
115 |
return "/" + fileNamePrefix + "_" + getHbaseTable() + "_" + token.getEnv().getAttribute("format") + ".seq";
|
|
121 |
return "/" + fileNamePrefix + "_" + tableName(token) + "_" + token.getEnv().getAttribute("format") + ".seq";
|
|
116 | 122 |
} |
117 | 123 |
|
118 | 124 |
private String getCollectionName(final NodeToken token) { |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/db2hbase.xml | ||
---|---|---|
51 | 51 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsEpr</PARAM> |
52 | 52 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
53 | 53 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
54 |
<PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/datasources_2_hbase.xsl |
|
55 |
</PARAM> |
|
54 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
56 | 55 |
</PARAMETERS> |
57 | 56 |
<ARCS> |
58 | 57 |
<ARC to="queryDatasourceOrganization"/> |
... | ... | |
77 | 76 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsOrgEpr</PARAM> |
78 | 77 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
79 | 78 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
80 |
<PARAM managedBy="system" name="xslt" required="true" type="string"> |
|
81 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/datasourceorganization_2_hbase.xsl |
|
82 |
</PARAM> |
|
79 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
83 | 80 |
</PARAMETERS> |
84 | 81 |
<ARCS> |
85 | 82 |
<ARC to="queryOrganizations"/> |
... | ... | |
103 | 100 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">orgEpr</PARAM> |
104 | 101 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
105 | 102 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
106 |
<PARAM managedBy="system" name="xslt" required="true" type="string"> |
|
107 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/organizations_2_hbase.xsl |
|
108 |
</PARAM> |
|
103 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
109 | 104 |
</PARAMETERS> |
110 | 105 |
<ARCS> |
111 | 106 |
<ARC to="queryPersons"/> |
... | ... | |
128 | 123 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">personsEpr</PARAM> |
129 | 124 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
130 | 125 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
131 |
<PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/persons_2_hbase.xsl |
|
132 |
</PARAM> |
|
126 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
133 | 127 |
</PARAMETERS> |
134 | 128 |
<ARCS> |
135 | 129 |
<ARC to="queryProjects"/> |
... | ... | |
152 | 146 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projEpr</PARAM> |
153 | 147 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
154 | 148 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
155 |
<PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/projects_2_hbase.xsl |
|
156 |
</PARAM> |
|
149 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
157 | 150 |
</PARAMETERS> |
158 | 151 |
<ARCS> |
159 | 152 |
<ARC to="queryProjectOrganization"/> |
... | ... | |
178 | 171 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projOrgEpr</PARAM> |
179 | 172 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
180 | 173 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
181 |
<PARAM managedBy="system" name="xslt" required="true" type="string"> |
|
182 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/projectorganization_2_hbase.xsl |
|
183 |
</PARAM> |
|
174 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
184 | 175 |
</PARAMETERS> |
185 | 176 |
<ARCS> |
186 | 177 |
<ARC to="queryProjectContactperson"/> |
... | ... | |
205 | 196 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projPerEpr</PARAM> |
206 | 197 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
207 | 198 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
208 |
<PARAM managedBy="system" name="xslt" required="true" type="string"> |
|
209 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/projectcontactperson_2_hbase.xsl |
|
210 |
</PARAM> |
|
199 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
211 | 200 |
</PARAMETERS> |
212 | 201 |
<ARCS> |
213 | 202 |
<ARC to="queryClaimRels"/> |
... | ... | |
232 | 221 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">claimRelsEpr</PARAM> |
233 | 222 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
234 | 223 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
235 |
<PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl |
|
236 |
</PARAM> |
|
224 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
237 | 225 |
</PARAMETERS> |
238 | 226 |
<ARCS> |
239 | 227 |
<ARC to="queryClaimUpdates"/> |
240 | 228 |
</ARCS> |
241 | 229 |
</NODE> |
242 | 230 |
<NODE name="queryClaimUpdates" type="QueryDb"> |
243 |
<DESCRIPTION>query Claim Rels</DESCRIPTION>
|
|
231 |
<DESCRIPTION>query Claim updates</DESCRIPTION>
|
|
244 | 232 |
<PARAMETERS> |
245 | 233 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
246 | 234 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsUpdate.sql |
... | ... | |
254 | 242 |
</ARCS> |
255 | 243 |
</NODE> |
256 | 244 |
<NODE name="storeClaimUpdates" type="StoreHBase"> |
257 |
<DESCRIPTION>Store relation Claim Project</DESCRIPTION>
|
|
245 |
<DESCRIPTION>Store claim updates</DESCRIPTION>
|
|
258 | 246 |
<PARAMETERS> |
259 | 247 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">claimRelsEpr</PARAM> |
260 | 248 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
261 | 249 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
262 |
<PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/claimUpdates_2_hbase.xsl |
|
263 |
</PARAM> |
|
250 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
264 | 251 |
</PARAMETERS> |
265 | 252 |
<ARCS> |
266 | 253 |
<ARC to="finish"/> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/odf2hbase.xml | ||
---|---|---|
13 | 13 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
14 | 14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
15 | 15 |
<CONFIGURATION start="manual"> |
16 |
<NODE name="setTable" type="SetHBaseTable" isStart="true"> |
|
17 |
<DESCRIPTION>set hbase table</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM name="table" type="string" managedBy="user" required="true"></PARAM> |
|
20 |
</PARAMETERS> |
|
21 |
<ARCS> |
|
22 |
<ARC to="mapreduce"/> |
|
23 |
</ARCS> |
|
24 |
</NODE> |
|
25 |
|
|
16 | 26 |
<NODE name="prepareImport" type="PrepareMDStoreImport" isStart="true"> |
17 | 27 |
<DESCRIPTION>Configure export to HDFS</DESCRIPTION> |
18 | 28 |
<PARAMETERS> |
19 | 29 |
<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM> |
20 |
<PARAM required="true" type="string" name="hdfsPath" managedBy="system">/tmp/mdstores_odf-store-cleaned.seq</PARAM>
|
|
21 |
<PARAM required="true" type="string" name="xsltParam" managedBy="system">xslt</PARAM>
|
|
22 |
<PARAM required="true" type="string" name="xslt" managedBy="system">/eu/dnetlib/data/transform/datacite_2_hbase.xsl</PARAM>
|
|
30 |
<PARAM required="true" type="string" name="hdfsPath" managedBy="user">/tmp/mdstores_odf-store-cleaned.seq</PARAM>
|
|
31 |
<PARAM required="true" type="string" name="mappingParam" managedBy="system">xslt</PARAM>
|
|
32 |
<PARAM required="true" type="string" name="mapping" managedBy="user" function="obtainValues('odf2hbaseMappings', {})"></PARAM>
|
|
23 | 33 |
</PARAMETERS> |
24 | 34 |
<ARCS> |
25 | 35 |
<ARC to="reuseHdfsRecords" /> |
... | ... | |
31 | 41 |
<PARAM required="true" type="boolean" name="reuseMdRecords" managedBy="user">false</PARAM> |
32 | 42 |
</PARAMETERS> |
33 | 43 |
<ARCS> |
34 |
<ARC name="true" to="mapreduce" />
|
|
44 |
<ARC name="true" to="doneExport"/>
|
|
35 | 45 |
<ARC name="false" to="exportRecords" /> |
36 | 46 |
</ARCS> |
37 | 47 |
</NODE> |
... | ... | |
55 | 65 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
56 | 66 |
</PARAMETERS> |
57 | 67 |
<ARCS> |
68 |
<ARC to="doneExport"/> |
|
69 |
</ARCS> |
|
70 |
</NODE> |
|
71 |
<NODE name="doneExport"> |
|
72 |
<DESCRIPTION></DESCRIPTION> |
|
73 |
<PARAMETERS/> |
|
74 |
<ARCS> |
|
58 | 75 |
<ARC to="mapreduce" /> |
59 | 76 |
</ARCS> |
60 |
</NODE>
|
|
61 |
<NODE name="mapreduce" type="SubmitHadoopJob"> |
|
77 |
</NODE> |
|
78 |
<NODE name="mapreduce" type="SubmitHadoopJob" isJoin="true">
|
|
62 | 79 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
63 | 80 |
<PARAMETERS> |
64 | 81 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
65 | 82 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">mdStoreHdfsImportJob</PARAM> |
83 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
66 | 84 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
67 |
{ |
|
68 |
'mapred.input.dir' : 'hdfsPath', |
|
69 |
'hbase.import.xslt' : 'xslt' |
|
85 |
{ |
|
86 |
'mapred.input.dir' : 'hdfsPath', |
|
87 |
'hbase.import.xslt' : 'xslt', |
|
88 |
'hbase.mapred.outputtable' : 'hbaseTable' |
|
70 | 89 |
} |
71 |
</PARAM> |
|
72 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
|
73 |
{ |
|
74 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable' |
|
75 |
} |
|
76 | 90 |
</PARAM> |
77 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
78 | 91 |
</PARAMETERS> |
79 | 92 |
<ARCS> |
80 | 93 |
<ARC to="checkODFResultCount" /> |
81 | 94 |
</ARCS> |
82 | 95 |
</NODE> |
83 |
<NODE name="checkODFResultCount" type="CheckHDFSCount"> |
|
96 |
<NODE name="checkODFResultCount" type="CheckHDFSCounts">
|
|
84 | 97 |
<DESCRIPTION>Checks if the number of publications stored on HBASE is the same as those in the HDFS file</DESCRIPTION> |
85 | 98 |
<PARAMETERS> |
86 | 99 |
<PARAM required="true" type="string" name="numberToVerifyParamName" managedBy="system">mdstore.result</PARAM> |
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/pom.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" ?> |
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
2 |
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0" |
|
3 |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 | 4 |
<parent> |
4 | 5 |
<groupId>eu.dnetlib</groupId> |
5 | 6 |
<artifactId>dnet-parent</artifactId> |
... | ... | |
103 | 104 |
<version>${javax.servlet.version}</version> |
104 | 105 |
<scope>provided</scope> |
105 | 106 |
</dependency> |
106 |
|
|
107 | 107 |
<dependency> |
108 |
<groupId>eu.dnetlib</groupId> |
|
109 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
110 |
<version>[1.0.0,2.0.0)</version> |
|
111 |
<scope>test</scope> |
|
112 |
</dependency> |
|
113 |
<dependency> |
|
108 | 114 |
<groupId>junit</groupId> |
109 | 115 |
<artifactId>junit</artifactId> |
110 | 116 |
<version>${junit.version}</version> |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/dedup/MinDistSearchHadoopJobNode.java | ||
---|---|---|
21 | 21 |
public class MinDistSearchHadoopJobNode extends DedupConfigurationAwareJobNode { |
22 | 22 |
|
23 | 23 |
private static final Log log = LogFactory.getLog(MinDistSearchHadoopJobNode.class); |
24 |
|
|
24 |
private final static String StatusParam = "MinDistSearchHadoopJobNode.status"; |
|
25 |
private final static String DepthParam = "mindist_recursion_depth"; |
|
26 |
private final static String UpdateCounterParam = "UpdateCounter.UPDATED"; |
|
27 |
private final static String DebugParam = "mindist_DEBUG"; |
|
25 | 28 |
@Autowired |
26 | 29 |
private UniqueServiceLocator serviceLocator; |
27 |
|
|
28 |
enum STATUS {DATALOAD, DEPTH_N} |
|
29 |
|
|
30 | 30 |
private boolean debug = false; |
31 |
|
|
32 | 31 |
private String outPathParam; |
33 | 32 |
|
34 |
private final static String StatusParam = "MinDistSearchHadoopJobNode.status"; |
|
35 |
|
|
36 |
private final static String DepthParam = "mindist_recursion_depth"; |
|
37 |
|
|
38 |
private final static String UpdateCounterParam = "UpdateCounter.UPDATED"; |
|
39 |
|
|
40 |
private final static String DebugParam = "mindist_DEBUG"; |
|
41 |
|
|
42 | 33 |
@Override |
43 | 34 |
protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception { |
44 | 35 |
|
... | ... | |
131 | 122 |
|
132 | 123 |
final String counter = job.getParameters().get(UpdateCounterParam); |
133 | 124 |
if (StringUtils.isBlank(counter)) { |
134 |
token.getFullEnv().setAttribute(StatusParam, STATUS.DATALOAD.toString()); |
|
125 |
token.getFullEnv().removeAttribute(StatusParam); |
|
126 |
token.getFullEnv().removeAttribute(DepthParam); |
|
135 | 127 |
log.info(String.format("done iteration %s:%s", UpdateCounterParam, 0)); |
136 | 128 |
engine.complete(token, Arc.DEFAULT_ARC); |
137 | 129 |
} else { |
... | ... | |
161 | 153 |
this.outPathParam = outPathParam; |
162 | 154 |
} |
163 | 155 |
|
156 |
enum STATUS {DATALOAD, DEPTH_N} |
|
157 |
|
|
164 | 158 |
} |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/StoreHBaseRecordsJobNode.java | ||
---|---|---|
8 | 8 |
import com.googlecode.sarasvati.env.Env; |
9 | 9 |
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions; |
10 | 10 |
import eu.dnetlib.data.hadoop.rmi.HadoopService; |
11 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
12 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
13 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
11 | 14 |
import eu.dnetlib.enabling.resultset.rmi.ResultSetException; |
12 | 15 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
13 | 16 |
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions; |
17 |
import eu.dnetlib.msro.rmi.MSROException; |
|
14 | 18 |
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode; |
15 | 19 |
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode; |
16 | 20 |
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener; |
... | ... | |
18 | 22 |
import eu.dnetlib.msro.workflows.util.ProgressProvider; |
19 | 23 |
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider; |
20 | 24 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
21 |
import org.apache.commons.io.IOUtils;
|
|
25 |
import org.apache.commons.lang.StringUtils;
|
|
22 | 26 |
import org.apache.commons.logging.Log; |
23 | 27 |
import org.apache.commons.logging.LogFactory; |
28 |
import org.springframework.beans.factory.annotation.Autowired; |
|
24 | 29 |
import org.springframework.beans.factory.annotation.Required; |
25 | 30 |
|
26 | 31 |
public class StoreHBaseRecordsJobNode extends BlackboardJobNode implements ProgressJobNode { |
27 | 32 |
|
28 | 33 |
private static final Log log = LogFactory.getLog(StoreHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
29 | 34 |
|
35 |
@Autowired |
|
36 |
private UniqueServiceLocator serviceLocator; |
|
37 |
|
|
30 | 38 |
private String inputEprParam; |
31 | 39 |
private String hbaseTableProperty; |
32 | 40 |
private String cluster; |
33 |
private String xslt;
|
|
41 |
private String mapping;
|
|
34 | 42 |
|
35 | 43 |
private boolean simulation = false; |
36 | 44 |
|
... | ... | |
49 | 57 |
|
50 | 58 |
job.setAction(HadoopBlackboardActions.IMPORT_EPR_HBASE.toString()); |
51 | 59 |
job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token))); |
52 |
job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(prepareXslt()));
|
|
60 |
job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(readXslt(getMapping())));
|
|
53 | 61 |
job.getParameters().put("table", tableName(token)); |
54 | 62 |
job.getParameters().put("cluster", cluster(token)); |
55 | 63 |
job.getParameters().put("simulation", String.valueOf(isSimulation())); |
... | ... | |
95 | 103 |
return resultsetProgressProvider.getEpr().toString(); |
96 | 104 |
} |
97 | 105 |
|
98 |
private String prepareXslt() throws IOException { |
|
99 |
return (xslt == null) || xslt.isEmpty() ? "" : IOUtils.toString(getClass().getResourceAsStream(xslt)); |
|
106 |
private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException { |
|
107 |
if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id"); |
|
108 |
|
|
109 |
log.info("loading mapping from profile id: " + profileId); |
|
110 |
|
|
111 |
final String xquery = |
|
112 |
String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId); |
|
113 |
return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery); |
|
100 | 114 |
} |
101 | 115 |
|
102 | 116 |
public String getInputEprParam() { |
... | ... | |
115 | 129 |
this.hbaseTableProperty = hbaseTableProperty; |
116 | 130 |
} |
117 | 131 |
|
118 |
@Required |
|
119 |
public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) { |
|
120 |
this.processCountingResultSetFactory = processCountingResultSetFactory; |
|
121 |
} |
|
122 |
|
|
123 | 132 |
@Override |
124 | 133 |
public ProgressProvider getProgressProvider() { |
125 | 134 |
return progressProvider; |
... | ... | |
133 | 142 |
return processCountingResultSetFactory; |
134 | 143 |
} |
135 | 144 |
|
136 |
public String getXslt() { |
|
137 |
return xslt; |
|
145 |
@Required |
|
146 |
public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) { |
|
147 |
this.processCountingResultSetFactory = processCountingResultSetFactory; |
|
138 | 148 |
} |
139 | 149 |
|
140 |
public void setXslt(final String xslt) {
|
|
141 |
this.xslt = xslt;
|
|
150 |
public String getMapping() {
|
|
151 |
return mapping;
|
|
142 | 152 |
} |
143 | 153 |
|
154 |
public void setMapping(final String mapping) { |
|
155 |
this.mapping = mapping; |
|
156 |
} |
|
157 |
|
|
144 | 158 |
public String getCluster() { |
145 | 159 |
return cluster; |
146 | 160 |
} |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/DeleteHBaseRecordsJobNode.java | ||
---|---|---|
3 | 3 |
import java.io.IOException; |
4 | 4 |
import java.util.Map; |
5 | 5 |
|
6 |
import org.apache.commons.io.IOUtils; |
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
import org.springframework.beans.factory.annotation.Required; |
|
10 |
|
|
11 | 6 |
import com.googlecode.sarasvati.Engine; |
12 | 7 |
import com.googlecode.sarasvati.NodeToken; |
13 | 8 |
import com.googlecode.sarasvati.env.Env; |
14 |
|
|
15 | 9 |
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions; |
16 | 10 |
import eu.dnetlib.data.hadoop.rmi.HadoopService; |
11 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
12 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
13 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
17 | 14 |
import eu.dnetlib.enabling.resultset.rmi.ResultSetException; |
18 | 15 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
19 | 16 |
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions; |
17 |
import eu.dnetlib.msro.rmi.MSROException; |
|
20 | 18 |
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode; |
21 | 19 |
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode; |
22 | 20 |
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener; |
... | ... | |
24 | 22 |
import eu.dnetlib.msro.workflows.util.ProgressProvider; |
25 | 23 |
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider; |
26 | 24 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
25 |
import org.apache.commons.lang.StringUtils; |
|
26 |
import org.apache.commons.logging.Log; |
|
27 |
import org.apache.commons.logging.LogFactory; |
|
28 |
import org.springframework.beans.factory.annotation.Autowired; |
|
29 |
import org.springframework.beans.factory.annotation.Required; |
|
27 | 30 |
|
28 | 31 |
public class DeleteHBaseRecordsJobNode extends BlackboardJobNode implements ProgressJobNode { |
29 | 32 |
|
30 | 33 |
private static final Log log = LogFactory.getLog(DeleteHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
31 | 34 |
|
35 |
@Autowired |
|
36 |
private UniqueServiceLocator serviceLocator; |
|
37 |
|
|
32 | 38 |
private String inputEprParam; |
33 | 39 |
private String hbaseTableProperty; |
34 | 40 |
private String cluster; |
35 |
private String xslt;
|
|
41 |
private String mapping;
|
|
36 | 42 |
|
37 | 43 |
private boolean simulation = false; |
38 | 44 |
|
... | ... | |
51 | 57 |
|
52 | 58 |
job.setAction(HadoopBlackboardActions.DELETE_EPR_HBASE.toString()); |
53 | 59 |
job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token))); |
54 |
job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(prepareXslt()));
|
|
60 |
job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(readXslt(getMapping())));
|
|
55 | 61 |
job.getParameters().put("table", getPropertyFetcher().getProperty(getHbaseTableProperty())); |
56 | 62 |
job.getParameters().put("cluster", cluster); |
57 | 63 |
job.getParameters().put("simulation", String.valueOf(isSimulation())); |
... | ... | |
80 | 86 |
return resultsetProgressProvider.getEpr().toString(); |
81 | 87 |
} |
82 | 88 |
|
83 |
private String prepareXslt() throws IOException { |
|
84 |
return (xslt == null) || xslt.isEmpty() ? "" : IOUtils.toString(getClass().getResourceAsStream(xslt)); |
|
89 |
private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException { |
|
90 |
if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id"); |
|
91 |
|
|
92 |
log.info("loading mapping from profile id: " + profileId); |
|
93 |
|
|
94 |
final String xquery = |
|
95 |
String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId); |
|
96 |
return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery); |
|
85 | 97 |
} |
86 | 98 |
|
87 | 99 |
public String getInputEprParam() { |
... | ... | |
100 | 112 |
this.hbaseTableProperty = hbaseTableProperty; |
101 | 113 |
} |
102 | 114 |
|
103 |
@Required |
|
104 |
public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) { |
|
105 |
this.processCountingResultSetFactory = processCountingResultSetFactory; |
|
106 |
} |
|
107 |
|
|
108 | 115 |
@Override |
109 | 116 |
public ProgressProvider getProgressProvider() { |
110 | 117 |
return progressProvider; |
... | ... | |
118 | 125 |
return processCountingResultSetFactory; |
119 | 126 |
} |
120 | 127 |
|
121 |
public String getXslt() { |
|
122 |
return xslt; |
|
128 |
@Required |
|
129 |
public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) { |
|
130 |
this.processCountingResultSetFactory = processCountingResultSetFactory; |
|
123 | 131 |
} |
124 | 132 |
|
125 |
public void setXslt(final String xslt) {
|
|
126 |
this.xslt = xslt;
|
|
133 |
public String getMapping() {
|
|
134 |
return mapping;
|
|
127 | 135 |
} |
128 | 136 |
|
137 |
public void setMapping(final String mapping) { |
|
138 |
this.mapping = mapping; |
|
139 |
} |
|
140 |
|
|
129 | 141 |
public String getCluster() { |
130 | 142 |
return cluster; |
131 | 143 |
} |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/PrepareMDStoreImportJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.workflows.hadoop; |
2 | 2 |
|
3 | 3 |
import java.io.IOException; |
4 |
import java.lang.reflect.Type; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.Set; |
|
8 | 4 |
|
9 |
import org.apache.commons.lang.StringUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.springframework.beans.factory.annotation.Autowired; |
|
13 |
import org.springframework.beans.factory.annotation.Value; |
|
14 |
|
|
15 |
import com.google.common.base.Function; |
|
16 |
import com.google.common.base.Joiner; |
|
17 |
import com.google.common.base.Predicate; |
|
18 |
import com.google.common.base.Splitter; |
|
19 |
import com.google.common.collect.Iterables; |
|
20 |
import com.google.common.collect.Maps; |
|
21 |
import com.google.common.collect.Sets; |
|
22 |
import com.google.gson.Gson; |
|
23 |
import com.google.gson.reflect.TypeToken; |
|
24 | 5 |
import com.googlecode.sarasvati.Arc; |
25 | 6 |
import com.googlecode.sarasvati.NodeToken; |
26 |
|
|
27 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; |
|
28 | 7 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
29 | 8 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
30 | 9 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
31 | 10 |
import eu.dnetlib.msro.rmi.MSROException; |
32 | 11 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
12 |
import org.apache.commons.lang.StringUtils; |
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
import org.springframework.beans.factory.annotation.Autowired; |
|
33 | 16 |
|
34 | 17 |
public class PrepareMDStoreImportJobNode extends SimpleJobNode { |
35 | 18 |
|
... | ... | |
46 | 29 |
|
47 | 30 |
private String mapping; |
48 | 31 |
|
49 |
private String dsTypeMapParam; |
|
50 |
|
|
51 |
@Value("${dnet.openaire.dataload.dstypemap.xquery}") |
|
52 |
private String dsTypeXquery; |
|
53 |
|
|
54 |
private String specialDatasourcesCSV; |
|
55 |
|
|
56 | 32 |
@Override |
57 | 33 |
protected String execute(final NodeToken token) throws Exception { |
58 | 34 |
|
59 | 35 |
token.getEnv().setAttribute(getHdfsPathParam(), getHdfsPath()); |
60 | 36 |
token.getEnv().setAttribute(getMappingParam(), readXslt(getMapping())); |
61 |
token.getEnv().setAttribute(getDsTypeMapParam(), getDatasourceTypeMap()); |
|
62 | 37 |
|
63 | 38 |
return Arc.DEFAULT_ARC; |
64 | 39 |
} |
65 | 40 |
|
66 |
/** |
|
67 |
* lookups IS for namespacePrefix -> datasource type, and build a map such that: for institutional repositories associate "false" to the |
|
68 |
* namespace prefix key, "true" otherwise and for a set of special prefixes |
|
69 |
* |
|
70 |
* @return json serialisation of the map |
|
71 |
* @throws ISLookUpException |
|
72 |
*/ |
|
73 |
private String getDatasourceTypeMap() throws ISLookUpException { |
|
74 |
|
|
75 |
final Set<String> specialSet = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().split(getSpecialDatasourcesCSV())); |
|
76 |
log.info("loaded special namespaceprefixes: " + specialSet.toString()); |
|
77 |
|
|
78 |
final List<String> lookupResult = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(getDsTypeXquery()); |
|
79 |
final Set<String> nsPrefixSet = Sets.newHashSet(); |
|
80 |
|
|
81 |
final Iterable<String> dstypeList = |
|
82 |
Iterables.filter(lookupResult, new Predicate<String>() { |
|
83 |
|
|
84 |
@Override |
|
85 |
public boolean apply(final String s) { |
|
86 |
final String nsPrefix = asNsPrefix(s); |
|
87 |
if (!nsPrefixSet.contains(nsPrefix)) { |
|
88 |
nsPrefixSet.add(nsPrefix); |
|
89 |
return true; |
|
90 |
} else return false; |
|
91 |
} |
|
92 |
}); |
|
93 |
// log.info("datasource type list size: " + dstypeList.size()); |
|
94 |
|
|
95 |
final Gson gson = new Gson(); |
|
96 |
final Type token = new TypeToken<Map<String, String>>() {}.getType(); |
|
97 |
final Map<String, String> dsMap = gson.fromJson(asJsonMap(dstypeList), token); |
|
98 |
log.debug("datasource type map size: " + dsMap.size()); |
|
99 |
final Map<String, String> dsTypeMap = Maps.transformValues(dsMap, new Function<String, String>() { |
|
100 |
|
|
101 |
@Override |
|
102 |
public String apply(final String type) { |
|
103 |
if ("pubsrepository::unknown".equals(type)) { |
|
104 |
if (specialSet.contains(type)) return "false"; |
|
105 |
return "true"; |
|
106 |
} else return "false"; |
|
107 |
} |
|
108 |
}); |
|
109 |
log.debug("transformed datasource type map size: " + dsTypeMap.size()); |
|
110 |
|
|
111 |
final String out = gson.toJson(dsTypeMap); |
|
112 |
|
|
113 |
return out; |
|
114 |
} |
|
115 |
|
|
116 |
private String asJsonMap(final Iterable<String> entries) { |
|
117 |
return "{" + Joiner.on(",\n").skipNulls().join(entries) + "}"; |
|
118 |
} |
|
119 |
|
|
120 |
private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpDocumentNotFoundException, ISLookUpException { |
|
41 |
private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException { |
|
121 | 42 |
if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id"); |
122 | 43 |
|
123 | 44 |
log.info("loading mapping from profile id: " + profileId); |
... | ... | |
127 | 48 |
return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery); |
128 | 49 |
} |
129 | 50 |
|
130 |
private String asNsPrefix(final String s) { |
|
131 |
return StringUtils.substringBefore(s, ":").replaceAll("\"", "").trim(); |
|
132 |
} |
|
133 |
|
|
134 | 51 |
public String getHdfsPathParam() { |
135 | 52 |
return hdfsPathParam; |
136 | 53 |
} |
... | ... | |
163 | 80 |
this.mappingParam = mappingParam; |
164 | 81 |
} |
165 | 82 |
|
166 |
public String getDsTypeMapParam() { |
|
167 |
return dsTypeMapParam; |
|
168 |
} |
|
169 |
|
|
170 |
public void setDsTypeMapParam(final String dsTypeMapParam) { |
|
171 |
this.dsTypeMapParam = dsTypeMapParam; |
|
172 |
} |
|
173 |
|
|
174 |
public String getDsTypeXquery() { |
|
175 |
return dsTypeXquery; |
|
176 |
} |
|
177 |
|
|
178 |
public void setDsTypeXquery(final String dsTypeXquery) { |
|
179 |
this.dsTypeXquery = dsTypeXquery; |
|
180 |
} |
|
181 |
|
|
182 |
public String getSpecialDatasourcesCSV() { |
|
183 |
return specialDatasourcesCSV; |
|
184 |
} |
|
185 |
|
|
186 |
public void setSpecialDatasourcesCSV(final String specialDatasourcesCSV) { |
|
187 |
this.specialDatasourcesCSV = specialDatasourcesCSV; |
|
188 |
} |
|
189 |
|
|
190 | 83 |
} |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/functionality/modular/ui/workflows/values/ListHBaseMappingTitleValues.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.util.List; |
4 | 4 |
import java.util.Map; |
5 |
|
|
6 | 5 |
import javax.annotation.Resource; |
7 | 6 |
|
8 | 7 |
import com.google.common.collect.Lists; |
9 |
|
|
10 | 8 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
11 | 9 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
12 | 10 |
import eu.dnetlib.msro.workflows.util.ValidNodeValuesFetcher; |
11 |
import org.springframework.beans.factory.annotation.Required; |
|
13 | 12 |
|
14 | 13 |
public class ListHBaseMappingTitleValues extends ValidNodeValuesFetcher { |
15 | 14 |
|
16 | 15 |
@Resource |
17 | 16 |
private UniqueServiceLocator serviceLocator; |
18 | 17 |
|
18 |
private String sourceFormat; |
|
19 |
|
|
19 | 20 |
@Override |
20 | 21 |
protected List<DnetParamValue> obtainValues(final Map<String, String> params) throws Exception { |
21 | 22 |
|
22 | 23 |
final String xquery = |
23 | 24 |
"for $x in /RESOURCE_PROFILE[" |
24 | 25 |
+ ".//RESOURCE_TYPE/@value='TransformationRuleDSResourceType' and " |
25 |
+ ".//SOURCE_METADATA_FORMAT/@name = 'oaf' and " |
Also available in: Unified diff
dynamic mapping in workflow definition, refactor, introduced new dedup workflows