Project

General

Profile

« Previous | Next » 

Revision 40247

dynamic mapping in workflow definition, refactor, introduced new dedup workflows

View differences:

modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/dedup.closeMesh.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="adaa9b08-a5df-4fd3-ad00-37153eac0010_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Close Similarity Mesh</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15

  
16

  
17
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
18
				<DESCRIPTION />
19
				<PARAMETERS>
20
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
21
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="waitConf" />
25
				</ARCS>
26
			</NODE>  
27
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
28
				<DESCRIPTION>Set table name</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
31
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="waitConf" />
35
				</ARCS>
36
			</NODE>
37
			
38
        	<NODE name="waitConf" isJoin="true">
39
				<DESCRIPTION/>
40
				<PARAMETERS/>
41
				<ARCS>
42
					<ARC to="entitySequence" />
43
				</ARCS>
44
			</NODE>			
45
			
46
			<NODE name="entitySequence" type="CheckEntitySequence">
47
				<DESCRIPTION>Check entity sequence</DESCRIPTION>
48
				<PARAMETERS>
49
					<PARAM required="true" type="string" name="entitySequence" managedBy="system">organization,result</PARAM>
50
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
51
				</PARAMETERS>
52
				<ARCS>
53
					<ARC to="dedupGrouper" />
54
					<ARC name="done" to="success" />
55
				</ARCS>
56
			</NODE>			
57

  
58

  
59
			<NODE name="dedupGrouper" type="DedupGrouperJob">
60
				<DESCRIPTION>dedup grouper</DESCRIPTION>
61
				<PARAMETERS>
62
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupGrouperJob</PARAM>
63
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
64
					<PARAM required="true" type="string" name="envParams" managedBy="system">
65
						{
66
							'dedup.conf' : 'dedup.conf',
67
							'entityTypeId' : 'entityTypeId',
68
							'entityType' : 'entityType',
69
							'cluster' : 'cluster',
70
							'hbase.mapred.inputtable' : 'tableName', 
71
							'hbase.mapred.outputtable' : 'tableName', 
72
							'hbase.mapreduce.inputtable' : 'tableName', 
73
							'hbase.mapreduce.outputtable' : 'tableName'													
74
						}
75
					</PARAM>					
76
				</PARAMETERS>
77
				<ARCS>
78
					<ARC to="dedupGrouper" />
79
					<ARC name="done" to="findRoots" />
80
				</ARCS>
81
			</NODE>
82
			<NODE name="findRoots" type="SubmitHadoopJob">
83
				<DESCRIPTION>find roots</DESCRIPTION>
84
				<PARAMETERS>
85
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupFindRootsJob</PARAM>
86
					<PARAM required="true" type="string" name="envParams" managedBy="system">
87
						{ 
88
							'dedup.conf' : 'dedup.conf',
89
							'entityTypeId' : 'entityTypeId',
90
							'entityType' : 'entityType',
91
							'cluster' : 'cluster',
92
							'hbase.mapred.inputtable' : 'tableName', 
93
							'hbase.mapred.outputtable' : 'tableName', 
94
							'hbase.mapreduce.inputtable' : 'tableName', 
95
							'hbase.mapreduce.outputtable' : 'tableName'												
96
						}
97
					</PARAM>					
98
				</PARAMETERS>
99
				<ARCS>
100
					<ARC to="buildRoots" />
101
				</ARCS>
102
			</NODE>
103
			<NODE name="buildRoots" type="SubmitHadoopJob">
104
				<DESCRIPTION>redirect rels</DESCRIPTION>
105
				<PARAMETERS>
106
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupBuildRootsJob</PARAM>
107
					<PARAM required="true" type="string" name="envParams" managedBy="system">
108
						{ 	
109
							'dedup.conf' : 'dedup.conf',
110
							'relClasses' : 'relClasses',
111
							'entityTypeId' : 'entityTypeId',
112
							'entityType' : 'entityType',
113
							'cluster' : 'cluster',
114
							'hbase.mapred.inputtable' : 'tableName', 
115
							'hbase.mapreduce.inputtable' : 'tableName', 
116
							'hbase.mapred.outputtable' : 'tableName', 
117
							'hbase.mapreduce.outputtable' : 'tableName'														
118
						}
119
					</PARAM>					
120
				</PARAMETERS>
121
				<ARCS>
122
					<ARC to="entitySequence" />
123
				</ARCS>
124
			</NODE>			
125

  
126

  
127
        </CONFIGURATION>
128
        <STATUS />
129
    </BODY>
130
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/persons_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="originalid" select="//FIELD[@name = 'personid']" />
13
		<xsl:variable name="personid" select="dnet:oafSplitId('person', $originalid)" />
14
		<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" />		
15
		<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" />
16
		<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" />
17
		
18
		<xsl:variable name="person" select="dnet:oafEntity('person', $personid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/>
19

  
20
		<ROWS>
21
			<xsl:if test="string-length($personid) &gt; 0">
22
				<ROW key="{$personid}" columnFamily="person">
23
					<QUALIFIER name="body" type="base64"><xsl:value-of select="$person"/></QUALIFIER>
24
				</ROW>
25
			</xsl:if>
26
		</ROWS>
27
	
28
	</xsl:template>
29

  
30
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projectcontactperson_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="projectId" select="dnet:oafSplitId('project', //FIELD[@name = 'project'])" />
13
		<xsl:variable name="contactpersonId" select="dnet:oafSplitId('person', //FIELD[@name = 'contactperson'])" />
14

  
15
		<ROWS>
16
			<xsl:if test="string-length($projectId) &gt; 0 and string-length($contactpersonId) &gt; 0">
17
			
18
				<xsl:variable name="projectcontactperson" 
19
						select="dnet:oafRel('projectPerson', $projectId, $contactpersonId, //FIELD[not(@isNull)], 'hasContact', 'dnet:project_person_relations')"/>
20
				<xsl:variable name="contactpersonproject" 
21
						select="dnet:oafRel('projectPerson', $contactpersonId, $projectId, //FIELD[not(@isNull)], 'isContact', 'dnet:project_person_relations')"/>											
22
			
23
				<ROW key="{$projectId}" columnFamily="projectPerson_contactPerson_hasContact">
24
					<QUALIFIER name="{$contactpersonId}" type="base64"><xsl:value-of select="$projectcontactperson"/></QUALIFIER>
25
				</ROW>
26
				<ROW key="{$contactpersonId}" columnFamily="projectPerson_contactPerson_isContact">
27
					<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="$contactpersonproject"/></QUALIFIER>
28
				</ROW>
29
			</xsl:if>
30
		</ROWS>
31
	
32
	</xsl:template>
33

  
34
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DmfToHbaseXsltFunctions"
7
	xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" 
8
	extension-element-prefixes="exslt"
9
	exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt">
10

  
11
	<xsl:output omit-xml-declaration="yes" indent="yes" />
12

  
13
	<xsl:template match="/*">
14
	
15
		<xsl:variable name="provenance"><xsl:value-of select="//FIELD[@name = 'provenance']"/></xsl:variable>
16
		<xsl:variable name="trust">0.9</xsl:variable>
17

  
18
		<xsl:choose>
19
			<xsl:when test="count(//RELATION) = 0">
20
				<ROWS />
21
			</xsl:when>
22
			<xsl:otherwise>
23
				<ROWS>
24
					<xsl:for-each select="//RELATION">
25
						<xsl:choose>
26
							<xsl:when test="./@type = 'resultProject'">
27
								<xsl:variable name="resultId" select="./@source" />
28
								<xsl:variable name="projectId">
29
									<xsl:choose>
30
										<xsl:when test="starts-with(@target, '40|')">
31
											<xsl:value-of select="./@target" />
32
										</xsl:when>
33
										<xsl:otherwise>
34
											<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/>
35
										</xsl:otherwise>
36
									</xsl:choose>
37
								</xsl:variable>
38
								
39
								<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy">
40
									<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="dnet:oafResultProject_Outcome_FromDMF($resultId, $projectId, 'isProducedBy', $provenance, $trust)"/></QUALIFIER>
41
								</ROW>
42
								<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces">
43
									<QUALIFIER name="{$resultId}" type="base64"><xsl:value-of select="dnet:oafResultProject_Outcome_FromDMF($projectId, $resultId, 'produces', $provenance, $trust)"/></QUALIFIER>
44
								</ROW>
45
							</xsl:when>
46
							<xsl:when test="./@type = 'resultResult_publicationdataset_isRelatedTo'">
47
								<xsl:variable name="source" select="./@source" />
48
								<xsl:variable name="target" select="./@target" />
49

  
50
								<ROW key="{$source}" columnFamily="resultResult_publicationDataset_isRelatedTo">
51
									<QUALIFIER name="{$target}" type="base64"><xsl:value-of select="dnet:oafResultResult_PublicationDataset_FromDMF($source, $target, 'isRelatedTo', $provenance, $trust)"/></QUALIFIER>
52
								</ROW>
53
								<ROW key="{$target}" columnFamily="resultResult_publicationDataset_isRelatedTo">
54
									<QUALIFIER name="{$source}" type="base64"><xsl:value-of select="dnet:oafResultResult_PublicationDataset_FromDMF($target, $source, 'isRelatedTo', $provenance, $trust)"/></QUALIFIER>
55
								</ROW>
56
							</xsl:when>							
57
						</xsl:choose>
58
					</xsl:for-each>
59
				</ROWS>
60
			</xsl:otherwise>
61
		</xsl:choose>
62
	</xsl:template>
63
	
64

  
65
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/organizations_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
		
12
		<xsl:variable name="originalid" select="//FIELD[@name = 'organizationid']" />
13
		<xsl:variable name="organizationid" select="dnet:oafSplitId('organization', $originalid)" />
14
		<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" />
15
		<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" />
16
		<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" />		
17

  
18
		<xsl:variable name="organization" select="dnet:oafEntity('organization', $organizationid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/>
19
		
20
		<ROWS>
21
			<xsl:if test="string-length($organizationid) &gt; 0">
22
				<ROW key="{$organizationid}" columnFamily="organization">
23
					<QUALIFIER name="body" type="base64"><xsl:value-of select="$organization"/></QUALIFIER>
24
				</ROW>
25
			</xsl:if>
26
		</ROWS>
27
	
28
	</xsl:template>
29

  
30
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/datasourceorganization_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="datasourceId" select="dnet:oafSplitId('datasource', //FIELD[@name = 'datasource'])" />
13
		<xsl:variable name="organizationId" select="dnet:oafSplitId('organization', //FIELD[@name = 'organization'])" />
14

  
15
		<ROWS>
16
			<xsl:if test="string-length($datasourceId) &gt; 0 and string-length($organizationId) &gt; 0">
17

  
18
				<xsl:variable name="datasourceorganization" select="dnet:oafRel('datasourceOrganization', $datasourceId, $organizationId, //FIELD[not(@isNull)], 'provides', 'dnet:datasources_organizations_typologies')"/>
19
				<xsl:variable name="organizationdatasource" select="dnet:oafRel('datasourceOrganization', $organizationId, $datasourceId, //FIELD[not(@isNull)], 'isProvidedBy', 'dnet:datasources_organizations_typologies')"/>
20

  
21
				<ROW key="{$datasourceId}" columnFamily="datasourceOrganization_provision_provides">
22
					<QUALIFIER name="{$organizationId}" type="base64"><xsl:value-of select="$datasourceorganization"/></QUALIFIER>
23
				</ROW>
24
				<ROW key="{$organizationId}" columnFamily="datasourceOrganization_provision_isProvidedBy">
25
					<QUALIFIER name="{$datasourceId}" type="base64"><xsl:value-of select="$organizationdatasource"/></QUALIFIER>
26
				</ROW>
27
			</xsl:if>			
28
		</ROWS>
29
	
30
	</xsl:template>
31

  
32
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projectorganization_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="projectId" select="dnet:oafSplitId('project', //FIELD[@name = 'project'])" />
13
		<xsl:variable name="organizationId" select="dnet:oafSplitId('organization', //FIELD[@name = 'resporganization'])" />
14

  
15
		<ROWS>
16
			<xsl:if test="string-length($projectId) &gt; 0 and string-length($organizationId) &gt; 0">
17
			
18
				<xsl:variable name="projectorganization" 
19
						select="dnet:oafRel('projectOrganization', $projectId, $organizationId, //FIELD[not(@isNull)], 'hasParticipant', 'dnet:project_organization_relations')"/>
20
				<xsl:variable name="organizationproject" 
21
						select="dnet:oafRel('projectOrganization', $organizationId, $projectId, //FIELD[not(@isNull)], 'isParticipant', 'dnet:project_organization_relations')"/>	
22
			
23
				<ROW key="{$projectId}" columnFamily="projectOrganization_participation_hasParticipant">
24
					<QUALIFIER name="{$organizationId}" type="base64"><xsl:value-of select="$projectorganization"/></QUALIFIER>
25
				</ROW>
26
				<ROW key="{$organizationId}" columnFamily="projectOrganization_participation_isParticipant">
27
					<QUALIFIER name="{$projectId}" type="base64"><xsl:value-of select="$organizationproject"/></QUALIFIER>
28
				</ROW>
29
			</xsl:if>
30
		</ROWS>
31
	
32
	</xsl:template>
33

  
34
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/projects_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="originalid" select="//FIELD[@name = 'projectid']" />
13
		<xsl:variable name="projectid" select="dnet:oafSplitId('project', $originalid)" />
14
		<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" />		
15
		<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" />
16
		<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" />			
17

  
18
		<xsl:variable name="project" select="dnet:oafEntity('project', $projectid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/>
19

  
20
		<ROWS>
21
			<xsl:if test="string-length($projectid) &gt; 0">
22
				<ROW key="{$projectid}" columnFamily="project">
23
					<QUALIFIER name="body" type="base64"><xsl:value-of select="$project"/></QUALIFIER>
24
				</ROW>
25
			</xsl:if>
26
		</ROWS>
27
	
28
	</xsl:template>
29

  
30
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/datasources_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11

  
12
		<xsl:variable name="originalid" select="//FIELD[@name = 'datasourceid']" />
13
		<xsl:variable name="datasourceid" select="dnet:oafSplitId('datasource', $originalid)" />		
14
		<xsl:variable name="dateofcollection" select="//FIELD[@name = 'dateofcollection']" />
15
		<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //FIELD[@name = 'collectedfromid'])" />
16
		<xsl:variable name="collectedfromname" select="//FIELD[@name = 'collectedfromname']" />
17
		
18
		<xsl:variable name="datasource" select="dnet:oafEntity('datasource', $datasourceid, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, //FIELD[not(@isNull)])"/>
19
		
20
		<ROWS>
21
			<xsl:if test="string-length($datasourceid) &gt; 0">
22
				<ROW key="{$datasourceid}" columnFamily="datasource">
23
					<QUALIFIER name="body" type="base64"><xsl:value-of select="$datasource"/></QUALIFIER>
24
				</ROW>
25
			</xsl:if>
26
		</ROWS>
27
	
28
	</xsl:template>
29

  
30
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/claimUpdates_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
                xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
                xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa"
5
                xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions"
6
                xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils"
7
                xmlns:exslt="http://exslt.org/common"
8
                version="1.0"
9
                extension-element-prefixes="exslt"
10
                exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date">
11

  
12
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
13

  
14
	<xsl:param name="trust" select="string('0.9')"/>
15
	<xsl:param name="provenance" select="string('UNKNOWN')"/>
16
	<xsl:param name="namespaceprefix" select="string('unknown_____')"/>
17

  
18
	<xsl:template match="/*">
19
		<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
20
		<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/>
21

  
22
		<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/>
23
		<xsl:variable name="collectedDatasourceid">
24
			<xsl:choose>
25
				<xsl:when test="string-length(//oaf:collectedDatasourceid) &gt; 0">
26
					<xsl:value-of select="//oaf:collectedDatasourceid"/>
27
				</xsl:when>
28
				<xsl:otherwise>
29
					<xsl:value-of select="UNKNOWN"/>
30
				</xsl:otherwise>
31
			</xsl:choose>
32
		</xsl:variable>
33

  
34
		<xsl:choose>
35
			<xsl:when test="count($metadata) = 0">
36
				<ROWS/>
37
			</xsl:when>
38
			<xsl:otherwise>
39

  
40
				<xsl:variable name="objidentifier" select="//record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']"/>
41

  
42
				<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)"/>
43

  
44
				<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
45
				<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/>
46

  
47
				<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
48
				<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/>
49

  
50
				<!-- 				<xsl:variable name="country" select="substring(//dr:repositoryCountry,  1, 200)" /> -->
51
				<!-- 				<xsl:variable name="accessmode" select="substring(//oaf:accessrights,   1, 200)" /> -->
52

  
53
				<xsl:variable name="result"
54
				              select="dnet:oafResultFromInfoPackage($resultId, $about, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection, $metadata)"/>
55

  
56
				<ROWS>
57
					<ROW key="{$resultId}" columnFamily="result">
58
						<QUALIFIER name="{concat('update_', date:now())}" type="base64">
59
							<xsl:value-of select="$result"/>
60
						</QUALIFIER>
61
					</ROW>
62
				</ROWS>
63
			</xsl:otherwise>
64
		</xsl:choose>
65
	</xsl:template>
66

  
67
</xsl:stylesheet>
68

  
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/msro/workflows/dedup/dissimilarity_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11

  
12
		<xsl:variable name="type" select="/ROW/FIELD[@name = 'entitytype']" />
13
		<xsl:variable name="source" select="dnet:oafSimpleId($type, /ROW/FIELD[@name = 'id1'])" />
14
		<xsl:variable name="target" select="dnet:oafSimpleId($type, /ROW/FIELD[@name = 'id2'])" />
15
		
16
		<xsl:variable name="columnFamily">
17
			<xsl:choose>
18
				<xsl:when test="$type = 'result'">resultResult_dedupSimilarity_isSimilarTo</xsl:when>
19
				<xsl:when test="$type = 'person'">personPerson_dedupSimilarity_isSimilarTo</xsl:when>
20
				<xsl:when test="$type = 'organization'">organizationOrganization_dedupSimilarity_isSimilarTo</xsl:when>
21
			</xsl:choose>
22
		</xsl:variable>
23

  
24
		<ROWS>
25
			<xsl:if test="string-length($source) &gt; 0 and string-length($target) &gt; 0 and string-length($columnFamily) &gt; 0">
26
				<ROW key="{$source}" columnFamily="{$columnFamily}">
27
					<QUALIFIER name="{$target}" type="base64"></QUALIFIER>
28
				</ROW>
29
				<ROW key="{$target}" columnFamily="{$columnFamily}">
30
					<QUALIFIER name="{$source}" type="base64"></QUALIFIER>
31
				</ROW>				
32
			</xsl:if>
33
		</ROWS>
34
	
35
	</xsl:template>
36

  
37
</xsl:stylesheet>
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/msro/workflows/dedup/similarity_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" 
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetDbToHbaseXsltFunctions">
7
	
8
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
9

  
10
	<xsl:template match="/">
11
	
12
		<xsl:variable name="source" select="/SIMILARITY/SOURCE/@id" />
13
		<xsl:variable name="target" select="/SIMILARITY/TARGET/@id" />
14
		<xsl:variable name="type" select="/SIMILARITY/TYPE/@name" />
15

  
16
		<xsl:variable name="columnFamily">
17
			<xsl:choose>
18
				<xsl:when test="$type = 'result'">resultResult_dedupSimilarity_isSimilarTo</xsl:when>
19
				<xsl:when test="$type = 'person'">personPerson_dedupSimilarity_isSimilarTo</xsl:when>
20
				<xsl:when test="$type = 'organization'">organizationOrganization_dedupSimilarity_isSimilarTo</xsl:when>
21
			</xsl:choose>
22
		</xsl:variable>
23

  
24
		<ROWS>
25
			<xsl:if test="string-length($source) &gt; 0 and string-length($target) &gt; 0 and string-length($columnFamily) &gt; 0">
26
				<ROW key="{$source}" columnFamily="{$columnFamily}">
27
					<QUALIFIER name="{$target}" type="base64"></QUALIFIER>
28
				</ROW>
29
			</xsl:if>
30
		</ROWS>
31
	
32
	</xsl:template>
33

  
34
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/test/java/eu/dnetlib/msro/openaireplus/workflows/hbase/ClaimRels2HbaseTest.java
1 1
package eu.dnetlib.msro.openaireplus.workflows.hbase;
2 2

  
3
import java.io.InputStream;
3 4
import java.io.StringReader;
4 5

  
6
import eu.dnetlib.data.proto.OafProtos.Oaf;
7
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
5 8
import org.apache.commons.codec.binary.Base64;
6 9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
7 12
import org.dom4j.Document;
13
import org.dom4j.DocumentException;
8 14
import org.dom4j.io.SAXReader;
15
import org.junit.Before;
9 16
import org.junit.Test;
10 17
import org.springframework.core.io.ClassPathResource;
11 18
import org.springframework.core.io.Resource;
12 19

  
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
15

  
16 20
public class ClaimRels2HbaseTest {
17 21

  
18
	private ApplyXslt xslt = new ApplyXslt(new ClassPathResource("/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl"));
22
	private static final Log log = LogFactory.getLog(ClaimRels2HbaseTest.class);
19 23

  
24
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
25

  
26
	private ApplyXslt xslt = null;
27

  
20 28
	private SAXReader reader = new SAXReader();
21 29

  
30
	@Before
31
	public void setUp() {
32
		xslt = new ApplyXslt(loadFromTransformationProfile("claimrels_2_hbase.xsl"));
33
	}
34

  
22 35
	@Test
23 36
	public void testXslt() throws Exception {
24 37

  
......
44 57

  
45 58
	}
46 59

  
60
	private String loadFromTransformationProfile(final String profilePath) {
61
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
62
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
63
		SAXReader saxReader = new SAXReader();
64
		Document doc = null;
65
		try {
66
			doc = saxReader.read(profile);
67
		} catch (DocumentException e) {
68
			e.printStackTrace();
69
			throw new RuntimeException(e);
70
		}
71
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
72
		return xslt;
73
	}
74

  
75

  
47 76
}
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/test/java/eu/dnetlib/msro/openaireplus/workflows/index/OpenaireLayoutToRecordStylesheetTest.java
1 1
package eu.dnetlib.msro.openaireplus.workflows.index;
2 2

  
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertNotNull;
5

  
6 3
import java.io.IOException;
7 4
import java.io.InputStream;
8 5
import java.io.StringReader;
9 6
import java.io.StringWriter;
10

  
7
import javax.xml.stream.XMLStreamException;
11 8
import javax.xml.transform.Transformer;
12 9
import javax.xml.transform.TransformerException;
13 10
import javax.xml.transform.TransformerFactory;
14 11
import javax.xml.transform.stream.StreamResult;
15 12
import javax.xml.transform.stream.StreamSource;
16 13

  
17
import org.apache.commons.io.IOUtils;
18
import org.apache.solr.common.SolrInputDocument;
19
import org.junit.Test;
20
import org.springframework.core.io.ClassPathResource;
21

  
22 14
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
23 15
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory;
24 16
import eu.dnetlib.miscutils.datetime.DateUtils;
25 17
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
26 18
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
19
import org.apache.commons.io.IOUtils;
20
import org.apache.solr.common.SolrInputDocument;
21
import org.junit.Test;
22
import org.springframework.core.io.ClassPathResource;
27 23

  
24
import static org.junit.Assert.assertFalse;
25
import static org.junit.Assert.assertNotNull;
26

  
28 27
public class OpenaireLayoutToRecordStylesheetTest {
29 28

  
30 29
	private static final String OPENAIRE_LAYOUT_TO_RECORD_STYLESHEET_XSL = "/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl";
......
34 33
	private static final String OAF_RECORD = "/eu/dnetlib/msro/openaireplus/workflows/index/oafRecord.xml";
35 34

  
36 35
	@Test
37
	public void test1() throws ISLookUpException, IOException, TransformerException {
36
	public void test1() throws ISLookUpException, IOException, TransformerException, XMLStreamException {
38 37
		String xsl = prepareXslt("DMF");
39 38
		assertNotNull(xsl);
40 39
		assertFalse(xsl.isEmpty());
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/index/PrepareIndexDataJobNode.java
4 4
import java.io.InputStream;
5 5
import java.io.StringReader;
6 6
import java.io.StringWriter;
7

  
8 7
import javax.annotation.Resource;
9 8
import javax.xml.transform.Transformer;
10 9
import javax.xml.transform.TransformerException;
......
12 11
import javax.xml.transform.stream.StreamResult;
13 12
import javax.xml.transform.stream.StreamSource;
14 13

  
15
import org.apache.commons.io.IOUtils;
16
import org.apache.commons.lang.StringUtils;
17
import org.apache.commons.logging.Log;
18
import org.apache.commons.logging.LogFactory;
19
import org.springframework.beans.factory.annotation.Required;
20
import org.springframework.core.io.ClassPathResource;
21

  
22 14
import com.googlecode.sarasvati.Arc;
23 15
import com.googlecode.sarasvati.NodeToken;
24

  
25 16
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
26 17
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
27 18
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
......
29 20
import eu.dnetlib.miscutils.datetime.DateUtils;
30 21
import eu.dnetlib.miscutils.functional.hash.Hashing;
31 22
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
23
import org.apache.commons.io.IOUtils;
24
import org.apache.commons.lang.StringUtils;
25
import org.apache.commons.logging.Log;
26
import org.apache.commons.logging.LogFactory;
27
import org.springframework.beans.factory.annotation.Required;
28
import org.springframework.core.io.ClassPathResource;
32 29

  
33 30
public class PrepareIndexDataJobNode extends SimpleJobNode {
34 31

  
......
83 80
		return Arc.DEFAULT_ARC;
84 81
	}
85 82

  
83
	protected String tableName(final NodeToken token) {
84
		if (token.getEnv().hasAttribute("hbaseTable")) {
85
			String table = token.getEnv().getAttribute("hbaseTable");
86
			log.debug("found override value in wfEnv for 'hbaseTable' param: " + table);
87
			return table;
88
		}
89
		return getHbaseTable();
90
	}
91

  
86 92
	public String getIndexSolrUrlZk() throws ISLookUpException {
87 93
		return getServiceConfigValue("for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()");
88 94
	}
......
112 118
	}
113 119

  
114 120
	private String getFileName(final NodeToken token, final String fileNamePrefix) {
115
		return "/" + fileNamePrefix + "_" + getHbaseTable() + "_" + token.getEnv().getAttribute("format") + ".seq";
121
		return "/" + fileNamePrefix + "_" + tableName(token) + "_" + token.getEnv().getAttribute("format") + ".seq";
116 122
	}
117 123

  
118 124
	private String getCollectionName(final NodeToken token) {
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/db2hbase.xml
51 51
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsEpr</PARAM>
52 52
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
53 53
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
54
                    <PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/datasources_2_hbase.xsl
55
                    </PARAM>
54
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
56 55
                </PARAMETERS>
57 56
                <ARCS>
58 57
                    <ARC to="queryDatasourceOrganization"/>
......
77 76
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsOrgEpr</PARAM>
78 77
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
79 78
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
80
                    <PARAM managedBy="system" name="xslt" required="true" type="string">
81
                        /eu/dnetlib/msro/openaireplus/workflows/hbase/datasourceorganization_2_hbase.xsl
82
                    </PARAM>
79
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
83 80
                </PARAMETERS>
84 81
                <ARCS>
85 82
                    <ARC to="queryOrganizations"/>
......
103 100
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">orgEpr</PARAM>
104 101
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
105 102
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
106
                    <PARAM managedBy="system" name="xslt" required="true" type="string">
107
                        /eu/dnetlib/msro/openaireplus/workflows/hbase/organizations_2_hbase.xsl
108
                    </PARAM>
103
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
109 104
                </PARAMETERS>
110 105
                <ARCS>
111 106
                    <ARC to="queryPersons"/>
......
128 123
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">personsEpr</PARAM>
129 124
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
130 125
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
131
                    <PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/persons_2_hbase.xsl
132
                    </PARAM>
126
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
133 127
                </PARAMETERS>
134 128
                <ARCS>
135 129
                    <ARC to="queryProjects"/>
......
152 146
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">projEpr</PARAM>
153 147
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
154 148
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
155
                    <PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/projects_2_hbase.xsl
156
                    </PARAM>
149
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
157 150
                </PARAMETERS>
158 151
                <ARCS>
159 152
                    <ARC to="queryProjectOrganization"/>
......
178 171
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">projOrgEpr</PARAM>
179 172
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
180 173
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
181
                    <PARAM managedBy="system" name="xslt" required="true" type="string">
182
                        /eu/dnetlib/msro/openaireplus/workflows/hbase/projectorganization_2_hbase.xsl
183
                    </PARAM>
174
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
184 175
                </PARAMETERS>
185 176
                <ARCS>
186 177
                    <ARC to="queryProjectContactperson"/>
......
205 196
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">projPerEpr</PARAM>
206 197
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
207 198
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
208
                    <PARAM managedBy="system" name="xslt" required="true" type="string">
209
                        /eu/dnetlib/msro/openaireplus/workflows/hbase/projectcontactperson_2_hbase.xsl
210
                    </PARAM>
199
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
211 200
                </PARAMETERS>
212 201
                <ARCS>
213 202
                    <ARC to="queryClaimRels"/>
......
232 221
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">claimRelsEpr</PARAM>
233 222
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
234 223
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
235
                    <PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/claimrels_2_hbase.xsl
236
                    </PARAM>
224
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
237 225
                </PARAMETERS>
238 226
                <ARCS>
239 227
                    <ARC to="queryClaimUpdates"/>
240 228
                </ARCS>
241 229
            </NODE>
242 230
            <NODE name="queryClaimUpdates" type="QueryDb">
243
                <DESCRIPTION>query Claim Rels</DESCRIPTION>
231
                <DESCRIPTION>query Claim updates</DESCRIPTION>
244 232
                <PARAMETERS>
245 233
                    <PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
246 234
                    <PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsUpdate.sql
......
254 242
                </ARCS>
255 243
            </NODE>
256 244
            <NODE name="storeClaimUpdates" type="StoreHBase">
257
                <DESCRIPTION>Store relation Claim Project</DESCRIPTION>
245
                <DESCRIPTION>Store claim updates</DESCRIPTION>
258 246
                <PARAMETERS>
259 247
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">claimRelsEpr</PARAM>
260 248
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
261 249
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
262
                    <PARAM managedBy="system" name="xslt" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/claimUpdates_2_hbase.xsl
263
                    </PARAM>
250
                    <PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
264 251
                </PARAMETERS>
265 252
                <ARCS>
266 253
                    <ARC to="finish"/>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/odf2hbase.xml
13 13
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
14 14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15 15
		<CONFIGURATION start="manual">
16
			<NODE name="setTable" type="SetHBaseTable" isStart="true">
17
				<DESCRIPTION>set hbase table</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="table" type="string" managedBy="user" required="true"></PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="mapreduce"/>
23
				</ARCS>
24
			</NODE>
25

  
16 26
			<NODE name="prepareImport" type="PrepareMDStoreImport" isStart="true">
17 27
				<DESCRIPTION>Configure export to HDFS</DESCRIPTION>
18 28
				<PARAMETERS>
19 29
					<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM>
20
					<PARAM required="true" type="string" name="hdfsPath" managedBy="system">/tmp/mdstores_odf-store-cleaned.seq</PARAM>
21
					<PARAM required="true" type="string" name="xsltParam" managedBy="system">xslt</PARAM>
22
					<PARAM required="true" type="string" name="xslt" managedBy="system">/eu/dnetlib/data/transform/datacite_2_hbase.xsl</PARAM>					
30
					<PARAM required="true" type="string" name="hdfsPath" managedBy="user">/tmp/mdstores_odf-store-cleaned.seq</PARAM>
31
					<PARAM required="true" type="string" name="mappingParam" managedBy="system">xslt</PARAM>
32
					<PARAM required="true" type="string" name="mapping" managedBy="user" function="obtainValues('odf2hbaseMappings', {})"></PARAM>
23 33
				</PARAMETERS>
24 34
				<ARCS>
25 35
					<ARC to="reuseHdfsRecords" />
......
31 41
					<PARAM required="true" type="boolean" name="reuseMdRecords" managedBy="user">false</PARAM>
32 42
				</PARAMETERS>
33 43
				<ARCS>
34
					<ARC name="true" to="mapreduce" />
44
					<ARC name="true" to="doneExport"/>
35 45
					<ARC name="false" to="exportRecords" />
36 46
				</ARCS>
37 47
			</NODE>
......
55 65
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>					
56 66
				</PARAMETERS>
57 67
				<ARCS>
68
					<ARC to="doneExport"/>
69
				</ARCS>
70
			</NODE>
71
			<NODE name="doneExport">
72
				<DESCRIPTION></DESCRIPTION>
73
				<PARAMETERS/>
74
				<ARCS>
58 75
					<ARC to="mapreduce" />
59 76
				</ARCS>
60
			</NODE>			
61
			<NODE name="mapreduce" type="SubmitHadoopJob">
77
			</NODE>
78
			<NODE name="mapreduce" type="SubmitHadoopJob" isJoin="true">
62 79
				<DESCRIPTION>Run M/R import Job</DESCRIPTION>
63 80
				<PARAMETERS>
64 81
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
65 82
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">mdStoreHdfsImportJob</PARAM>
83
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
66 84
					<PARAM required="true" type="string" name="envParams" managedBy="system">
67
						{ 
68
							'mapred.input.dir' : 'hdfsPath', 
69
							'hbase.import.xslt' : 'xslt'
85
						{
86
						'mapred.input.dir' : 'hdfsPath',
87
						'hbase.import.xslt' : 'xslt',
88
						'hbase.mapred.outputtable' : 'hbaseTable'
70 89
						}
71
					</PARAM>	
72
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
73
						{ 
74
							'hbase.mapred.outputtable' : 'hbase.mapred.datatable'
75
						}
76 90
					</PARAM>
77
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
78 91
				</PARAMETERS>
79 92
				<ARCS>
80 93
					<ARC to="checkODFResultCount" />
81 94
				</ARCS>
82 95
			</NODE>
83
			<NODE name="checkODFResultCount" type="CheckHDFSCount">
96
			<NODE name="checkODFResultCount" type="CheckHDFSCounts">
84 97
				<DESCRIPTION>Checks if the number of publications stored on HBASE is the same as those in the HDFS file</DESCRIPTION>
85 98
				<PARAMETERS>
86 99
					<PARAM required="true" type="string" name="numberToVerifyParamName" managedBy="system">mdstore.result</PARAM>
modules/dnet-openaireplus-workflows/branches/dnet-openaireplus-workflows-4.1.X/pom.xml
1 1
<?xml version="1.0" ?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
2
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
3
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3 4
	<parent>
4 5
		<groupId>eu.dnetlib</groupId>
5 6
		<artifactId>dnet-parent</artifactId>
......
103 104
			<version>${javax.servlet.version}</version>
104 105
			<scope>provided</scope>
105 106
		</dependency>
106

  
107 107
		<dependency>
108
			<groupId>eu.dnetlib</groupId>
109
			<artifactId>dnet-openaireplus-profiles</artifactId>
110
			<version>[1.0.0,2.0.0)</version>
111
			<scope>test</scope>
112
		</dependency>
113
		<dependency>
108 114
			<groupId>junit</groupId>
109 115
			<artifactId>junit</artifactId>
110 116
			<version>${junit.version}</version>
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/dedup/MinDistSearchHadoopJobNode.java
21 21
public class MinDistSearchHadoopJobNode extends DedupConfigurationAwareJobNode {
22 22

  
23 23
	private static final Log log = LogFactory.getLog(MinDistSearchHadoopJobNode.class);
24

  
24
	private final static String StatusParam = "MinDistSearchHadoopJobNode.status";
25
	private final static String DepthParam = "mindist_recursion_depth";
26
	private final static String UpdateCounterParam = "UpdateCounter.UPDATED";
27
	private final static String DebugParam = "mindist_DEBUG";
25 28
	@Autowired
26 29
	private UniqueServiceLocator serviceLocator;
27

  
28
	enum STATUS {DATALOAD, DEPTH_N}
29

  
30 30
	private boolean debug = false;
31

  
32 31
	private String outPathParam;
33 32

  
34
	private final static String StatusParam = "MinDistSearchHadoopJobNode.status";
35

  
36
	private final static String DepthParam = "mindist_recursion_depth";
37

  
38
	private final static String UpdateCounterParam = "UpdateCounter.UPDATED";
39

  
40
	private final static String DebugParam = "mindist_DEBUG";
41

  
42 33
	@Override
43 34
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
44 35

  
......
131 122

  
132 123
					final String counter = job.getParameters().get(UpdateCounterParam);
133 124
					if (StringUtils.isBlank(counter)) {
134
						token.getFullEnv().setAttribute(StatusParam, STATUS.DATALOAD.toString());
125
						token.getFullEnv().removeAttribute(StatusParam);
126
						token.getFullEnv().removeAttribute(DepthParam);
135 127
						log.info(String.format("done iteration %s:%s", UpdateCounterParam, 0));
136 128
						engine.complete(token, Arc.DEFAULT_ARC);
137 129
					} else {
......
161 153
		this.outPathParam = outPathParam;
162 154
	}
163 155

  
156
	enum STATUS {DATALOAD, DEPTH_N}
157

  
164 158
}
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/StoreHBaseRecordsJobNode.java
8 8
import com.googlecode.sarasvati.env.Env;
9 9
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
10 10
import eu.dnetlib.data.hadoop.rmi.HadoopService;
11
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
12
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
11 14
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
12 15
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
13 16
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
17
import eu.dnetlib.msro.rmi.MSROException;
14 18
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
15 19
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode;
16 20
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
......
18 22
import eu.dnetlib.msro.workflows.util.ProgressProvider;
19 23
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
20 24
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
21
import org.apache.commons.io.IOUtils;
25
import org.apache.commons.lang.StringUtils;
22 26
import org.apache.commons.logging.Log;
23 27
import org.apache.commons.logging.LogFactory;
28
import org.springframework.beans.factory.annotation.Autowired;
24 29
import org.springframework.beans.factory.annotation.Required;
25 30

  
26 31
public class StoreHBaseRecordsJobNode extends BlackboardJobNode implements ProgressJobNode {
27 32

  
28 33
	private static final Log log = LogFactory.getLog(StoreHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
29 34

  
35
	@Autowired
36
	private UniqueServiceLocator serviceLocator;
37

  
30 38
	private String inputEprParam;
31 39
	private String hbaseTableProperty;
32 40
	private String cluster;
33
	private String xslt;
41
	private String mapping;
34 42

  
35 43
	private boolean simulation = false;
36 44

  
......
49 57

  
50 58
		job.setAction(HadoopBlackboardActions.IMPORT_EPR_HBASE.toString());
51 59
		job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token)));
52
		job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(prepareXslt()));
60
		job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(readXslt(getMapping())));
53 61
		job.getParameters().put("table", tableName(token));
54 62
		job.getParameters().put("cluster", cluster(token));
55 63
		job.getParameters().put("simulation", String.valueOf(isSimulation()));
......
95 103
		return resultsetProgressProvider.getEpr().toString();
96 104
	}
97 105

  
98
	private String prepareXslt() throws IOException {
99
		return (xslt == null) || xslt.isEmpty() ? "" : IOUtils.toString(getClass().getResourceAsStream(xslt));
106
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException {
107
		if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id");
108

  
109
		log.info("loading mapping from profile id: " + profileId);
110

  
111
		final String xquery =
112
				String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId);
113
		return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
100 114
	}
101 115

  
102 116
	public String getInputEprParam() {
......
115 129
		this.hbaseTableProperty = hbaseTableProperty;
116 130
	}
117 131

  
118
	@Required
119
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
120
		this.processCountingResultSetFactory = processCountingResultSetFactory;
121
	}
122

  
123 132
	@Override
124 133
	public ProgressProvider getProgressProvider() {
125 134
		return progressProvider;
......
133 142
		return processCountingResultSetFactory;
134 143
	}
135 144

  
136
	public String getXslt() {
137
		return xslt;
145
	@Required
146
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
147
		this.processCountingResultSetFactory = processCountingResultSetFactory;
138 148
	}
139 149

  
140
	public void setXslt(final String xslt) {
141
		this.xslt = xslt;
150
	public String getMapping() {
151
		return mapping;
142 152
	}
143 153

  
154
	public void setMapping(final String mapping) {
155
		this.mapping = mapping;
156
	}
157

  
144 158
	public String getCluster() {
145 159
		return cluster;
146 160
	}
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/DeleteHBaseRecordsJobNode.java
3 3
import java.io.IOException;
4 4
import java.util.Map;
5 5

  
6
import org.apache.commons.io.IOUtils;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.springframework.beans.factory.annotation.Required;
10

  
11 6
import com.googlecode.sarasvati.Engine;
12 7
import com.googlecode.sarasvati.NodeToken;
13 8
import com.googlecode.sarasvati.env.Env;
14

  
15 9
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
16 10
import eu.dnetlib.data.hadoop.rmi.HadoopService;
11
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
12
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
17 14
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
18 15
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
19 16
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
17
import eu.dnetlib.msro.rmi.MSROException;
20 18
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
21 19
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode;
22 20
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
......
24 22
import eu.dnetlib.msro.workflows.util.ProgressProvider;
25 23
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
26 24
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
25
import org.apache.commons.lang.StringUtils;
26
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.LogFactory;
28
import org.springframework.beans.factory.annotation.Autowired;
29
import org.springframework.beans.factory.annotation.Required;
27 30

  
28 31
public class DeleteHBaseRecordsJobNode extends BlackboardJobNode implements ProgressJobNode {
29 32

  
30 33
	private static final Log log = LogFactory.getLog(DeleteHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
31 34

  
35
	@Autowired
36
	private UniqueServiceLocator serviceLocator;
37

  
32 38
	private String inputEprParam;
33 39
	private String hbaseTableProperty;
34 40
	private String cluster;
35
	private String xslt;
41
	private String mapping;
36 42

  
37 43
	private boolean simulation = false;
38 44

  
......
51 57

  
52 58
		job.setAction(HadoopBlackboardActions.DELETE_EPR_HBASE.toString());
53 59
		job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token)));
54
		job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(prepareXslt()));
60
		job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(readXslt(getMapping())));
55 61
		job.getParameters().put("table", getPropertyFetcher().getProperty(getHbaseTableProperty()));
56 62
		job.getParameters().put("cluster", cluster);
57 63
		job.getParameters().put("simulation", String.valueOf(isSimulation()));
......
80 86
		return resultsetProgressProvider.getEpr().toString();
81 87
	}
82 88

  
83
	private String prepareXslt() throws IOException {
84
		return (xslt == null) || xslt.isEmpty() ? "" : IOUtils.toString(getClass().getResourceAsStream(xslt));
89
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException {
90
		if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id");
91

  
92
		log.info("loading mapping from profile id: " + profileId);
93

  
94
		final String xquery =
95
				String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId);
96
		return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
85 97
	}
86 98

  
87 99
	public String getInputEprParam() {
......
100 112
		this.hbaseTableProperty = hbaseTableProperty;
101 113
	}
102 114

  
103
	@Required
104
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
105
		this.processCountingResultSetFactory = processCountingResultSetFactory;
106
	}
107

  
108 115
	@Override
109 116
	public ProgressProvider getProgressProvider() {
110 117
		return progressProvider;
......
118 125
		return processCountingResultSetFactory;
119 126
	}
120 127

  
121
	public String getXslt() {
122
		return xslt;
128
	@Required
129
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
130
		this.processCountingResultSetFactory = processCountingResultSetFactory;
123 131
	}
124 132

  
125
	public void setXslt(final String xslt) {
126
		this.xslt = xslt;
133
	public String getMapping() {
134
		return mapping;
127 135
	}
128 136

  
137
	public void setMapping(final String mapping) {
138
		this.mapping = mapping;
139
	}
140

  
129 141
	public String getCluster() {
130 142
		return cluster;
131 143
	}
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/hadoop/PrepareMDStoreImportJobNode.java
1 1
package eu.dnetlib.msro.workflows.hadoop;
2 2

  
3 3
import java.io.IOException;
4
import java.lang.reflect.Type;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Set;
8 4

  
9
import org.apache.commons.lang.StringUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.springframework.beans.factory.annotation.Autowired;
13
import org.springframework.beans.factory.annotation.Value;
14

  
15
import com.google.common.base.Function;
16
import com.google.common.base.Joiner;
17
import com.google.common.base.Predicate;
18
import com.google.common.base.Splitter;
19
import com.google.common.collect.Iterables;
20
import com.google.common.collect.Maps;
21
import com.google.common.collect.Sets;
22
import com.google.gson.Gson;
23
import com.google.gson.reflect.TypeToken;
24 5
import com.googlecode.sarasvati.Arc;
25 6
import com.googlecode.sarasvati.NodeToken;
26

  
27
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
28 7
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
29 8
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
30 9
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
31 10
import eu.dnetlib.msro.rmi.MSROException;
32 11
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
12
import org.apache.commons.lang.StringUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.springframework.beans.factory.annotation.Autowired;
33 16

  
34 17
public class PrepareMDStoreImportJobNode extends SimpleJobNode {
35 18

  
......
46 29

  
47 30
	private String mapping;
48 31

  
49
	private String dsTypeMapParam;
50

  
51
	@Value("${dnet.openaire.dataload.dstypemap.xquery}")
52
	private String dsTypeXquery;
53

  
54
	private String specialDatasourcesCSV;
55

  
56 32
	@Override
57 33
	protected String execute(final NodeToken token) throws Exception {
58 34

  
59 35
		token.getEnv().setAttribute(getHdfsPathParam(), getHdfsPath());
60 36
		token.getEnv().setAttribute(getMappingParam(), readXslt(getMapping()));
61
		token.getEnv().setAttribute(getDsTypeMapParam(), getDatasourceTypeMap());
62 37

  
63 38
		return Arc.DEFAULT_ARC;
64 39
	}
65 40

  
66
	/**
67
	 * lookups IS for namespacePrefix -> datasource type, and build a map such that: for institutional repositories associate "false" to the
68
	 * namespace prefix key, "true" otherwise and for a set of special prefixes
69
	 *
70
	 * @return json serialisation of the map
71
	 * @throws ISLookUpException
72
	 */
73
	private String getDatasourceTypeMap() throws ISLookUpException {
74

  
75
		final Set<String> specialSet = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().split(getSpecialDatasourcesCSV()));
76
		log.info("loaded special namespaceprefixes: " + specialSet.toString());
77

  
78
		final List<String> lookupResult = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(getDsTypeXquery());
79
		final Set<String> nsPrefixSet = Sets.newHashSet();
80

  
81
		final Iterable<String> dstypeList =
82
				Iterables.filter(lookupResult, new Predicate<String>() {
83

  
84
					@Override
85
					public boolean apply(final String s) {
86
						final String nsPrefix = asNsPrefix(s);
87
						if (!nsPrefixSet.contains(nsPrefix)) {
88
							nsPrefixSet.add(nsPrefix);
89
							return true;
90
						} else return false;
91
					}
92
				});
93
		// log.info("datasource type list size: " + dstypeList.size());
94

  
95
		final Gson gson = new Gson();
96
		final Type token = new TypeToken<Map<String, String>>() {}.getType();
97
		final Map<String, String> dsMap = gson.fromJson(asJsonMap(dstypeList), token);
98
		log.debug("datasource type map size: " + dsMap.size());
99
		final Map<String, String> dsTypeMap = Maps.transformValues(dsMap, new Function<String, String>() {
100

  
101
			@Override
102
			public String apply(final String type) {
103
				if ("pubsrepository::unknown".equals(type)) {
104
					if (specialSet.contains(type)) return "false";
105
					return "true";
106
				} else return "false";
107
			}
108
		});
109
		log.debug("transformed datasource type map size: " + dsTypeMap.size());
110

  
111
		final String out = gson.toJson(dsTypeMap);
112

  
113
		return out;
114
	}
115

  
116
	private String asJsonMap(final Iterable<String> entries) {
117
		return "{" + Joiner.on(",\n").skipNulls().join(entries) + "}";
118
	}
119

  
120
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpDocumentNotFoundException, ISLookUpException {
41
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException {
121 42
		if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id");
122 43

  
123 44
		log.info("loading mapping from profile id: " + profileId);
......
127 48
		return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
128 49
	}
129 50

  
130
	private String asNsPrefix(final String s) {
131
		return StringUtils.substringBefore(s, ":").replaceAll("\"", "").trim();
132
	}
133

  
134 51
	public String getHdfsPathParam() {
135 52
		return hdfsPathParam;
136 53
	}
......
163 80
		this.mappingParam = mappingParam;
164 81
	}
165 82

  
166
	public String getDsTypeMapParam() {
167
		return dsTypeMapParam;
168
	}
169

  
170
	public void setDsTypeMapParam(final String dsTypeMapParam) {
171
		this.dsTypeMapParam = dsTypeMapParam;
172
	}
173

  
174
	public String getDsTypeXquery() {
175
		return dsTypeXquery;
176
	}
177

  
178
	public void setDsTypeXquery(final String dsTypeXquery) {
179
		this.dsTypeXquery = dsTypeXquery;
180
	}
181

  
182
	public String getSpecialDatasourcesCSV() {
183
		return specialDatasourcesCSV;
184
	}
185

  
186
	public void setSpecialDatasourcesCSV(final String specialDatasourcesCSV) {
187
		this.specialDatasourcesCSV = specialDatasourcesCSV;
188
	}
189

  
190 83
}
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/functionality/modular/ui/workflows/values/ListHBaseMappingTitleValues.java
2 2

  
3 3
import java.util.List;
4 4
import java.util.Map;
5

  
6 5
import javax.annotation.Resource;
7 6

  
8 7
import com.google.common.collect.Lists;
9

  
10 8
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
11 9
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
12 10
import eu.dnetlib.msro.workflows.util.ValidNodeValuesFetcher;
11
import org.springframework.beans.factory.annotation.Required;
13 12

  
14 13
public class ListHBaseMappingTitleValues extends ValidNodeValuesFetcher {
15 14

  
16 15
	@Resource
17 16
	private UniqueServiceLocator serviceLocator;
18 17

  
18
	private String sourceFormat;
19

  
19 20
	@Override
20 21
	protected List<DnetParamValue> obtainValues(final Map<String, String> params) throws Exception {
21 22

  
22 23
		final String xquery =
23 24
				"for $x in /RESOURCE_PROFILE["
24 25
						+ ".//RESOURCE_TYPE/@value='TransformationRuleDSResourceType' and "
25
						+ ".//SOURCE_METADATA_FORMAT/@name = 'oaf' and "
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff