Project

General

Profile

« Previous | Next » 

Revision 42821

tests, added openaire profiles

View differences:

modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/applicationContext-actionmanager-test.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
	xmlns:jaxws="http://cxf.apache.org/jaxws" xmlns:sec="http://cxf.apache.org/configuration/security"
4
	xmlns:wsa="http://cxf.apache.org/ws/addressing" xmlns:p="http://www.springframework.org/schema/p"
5
	xmlns:http="http://cxf.apache.org/transports/http/configuration" xmlns:t="http://dnetlib.eu/springbeans/t"
6
	xmlns:template="http://dnetlib.eu/springbeans/template" xmlns:util="http://www.springframework.org/schema/util"
7
	xmlns="http://www.springframework.org/schema/beans"
8
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
9
                                    http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd
10
                                    http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd
11
                                    http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd
12
                            http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd
13
                            http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd
14
                            http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd">
15

  
16

  
17
	<bean id="actionFactory" class="eu.dnetlib.data.actionmanager.actions.ActionFactory">
18
		<property name="xslts">
19
			<util:map>
20
				<entry key="oaf2insertActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/oaf2insertActions.xslt" />
21
				<entry key="oaf2updateActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/oaf2updateActions.xslt" />
22
				<entry key="odf2insertActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/odf2insertActions.xslt" />
23
				<entry key="odf2updateActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/odf2updateActions.xslt" />
24
				<entry key="rels2Actions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/rels2actions.xslt" />
25
			</util:map>
26
		</property>
27
	</bean>
28

  
29
</beans>
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/odf2updateActions.xslt
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
                xmlns:dnet="eu.dnetlib.data.transform.xml.OdfToHbaseXsltFunctions"
4
                xmlns:oaf="http://namespace.openaire.eu/oaf"
5
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
6
                xmlns:date="java.lang.System"
7
                xmlns:dr="http://www.driver-repository.eu/namespace/dr"
8
                xmlns:exslt="http://exslt.org/common" version="1.0"
9
                extension-element-prefixes="exslt" exclude-result-prefixes="xsl dnet exslt oaf dr dri date">
10

  
11
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
12

  
13
	<xsl:param name="trust" select="string('0.9')"/>
14
	<xsl:param name="provenance" select="string('UNKNOWN')"/>
15
	<xsl:param name="namespaceprefix" select="string('datacite____')"/>
16

  
17
	<xsl:template match="/*">
18
		<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
19
		<xsl:variable name="metadata" select="exslt:node-set(/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource'])"/>
20
		<xsl:variable name="rightNSPrefix">
21
			<xsl:choose>
22
				<xsl:when test="not($namespaceprefix)">
23
					<xsl:value-of select="//oaf:datasourceprefix"/>
24
				</xsl:when>
25
				<xsl:otherwise>
26
					<xsl:value-of select="$namespaceprefix"/>
27
				</xsl:otherwise>
28
			</xsl:choose>
29
		</xsl:variable>
30

  
31
		<xsl:choose>
32
			<xsl:when test="count($metadata) =  0">
33
				<ACTIONS/>
34
			</xsl:when>
35
			<xsl:otherwise>
36
				<xsl:variable name="originalId" select="//*[local-name() = 'identifier' and ./@identifierType='DOI']"/>
37
				<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/>
38

  
39
				<xsl:if test="string-length($resultId) &gt; 0">
40
					<xsl:variable name="originalids"
41
					              select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/>
42

  
43
					<xsl:variable name="creators" select="//*[local-name() = 'creator']"/>
44
					<xsl:variable name="titles" select="//*[local-name() = 'title']"/>
45
					<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/>
46
					<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/>
47
					<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/>
48
					<xsl:variable name="dates" select="//*[local-name() = 'date']"/>
49
					<xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/>
50
					<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/>
51
					<xsl:variable name="formats" select="//*[local-name() = 'format']"/>
52
					<xsl:variable name="sizes" select="//*[local-name() = 'size']"/>
53
					<xsl:variable name="rights" select="//oaf:accessrights"/>
54
					<xsl:variable name="version" select="//*[local-name() = 'version']"/>
55
					<xsl:variable name="instanceURI"
56
					              select="concat('http://dx.doi.org','/',//*[local-name() = 'resource']/*[local-name() = 'identifier'])"/>
57
					<xsl:variable name="hostedbyid"
58
					              select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
59
					<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/>
60
					<xsl:variable name="collectedfromid"
61
					              select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
62
					<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/>
63
					<xsl:variable name="dateOfCollection" select="//dr:dateOfCollection"/>
64
					<xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/>
65
					<xsl:variable name="language" select="//oaf:language"/>
66
					<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/>
67
					<xsl:variable name="contributors" select="//*[local-name() = 'contributorName']"/>
68
					<xsl:variable name="pids" select="//*[local-name() = 'resource']/*[local-name()='identifier']"/>
69

  
70
					<xsl:variable name="result"
71
					              select="dnet:odfResult($resultId, $about, $metadata, $titles, $subjects, $publisher, $descriptions,
72
			                                            $dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributors, $rights,
73
			                                            $version, $pids, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid,
74
			                                            $collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation)"/>
75
					<ACTIONS>
76
						<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:nanoTime())}">
77
							<xsl:value-of select="$result"/>
78
						</ACTION>
79
					</ACTIONS>
80
				</xsl:if>
81
			</xsl:otherwise>
82
		</xsl:choose>
83
	</xsl:template>
84
</xsl:stylesheet>
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/odf2insertActions.xslt
1
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
2
                xmlns:oaf="http://namespace.openaire.eu/oaf"
3
                xmlns:dnet="eu.dnetlib.data.transform.xml.OdfToHbaseXsltFunctions"
4
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
                xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:exslt="http://exslt.org/common"
6
                version="1.0" extension-element-prefixes="exslt"
7
                exclude-result-prefixes="xsl oaf dr dri dnet exslt">
8

  
9
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
10
	<xsl:template match="/*">
11
		<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
12
		<xsl:variable name="dateOfCollection" select="concat('', //dri:dateOfCollection)"/>
13
		<xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/>
14

  
15
		<xsl:variable name="trust" select="string('0.9')"/>
16
		<xsl:variable name="provenance" select="string('sysimport:crosswalk:datasetarchive')"/>
17

  
18
		<xsl:variable name="metadata"
19
		              select="exslt:node-set(//*[local-name()='metadata']/*)"/>
20
		<xsl:variable name="namespaceprefix">
21
			<xsl:choose>
22
				<!-- TODO check namespaceprefix length is 12 -->
23
				<xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
24
					<xsl:value-of select="//oaf:datasourceprefix"/>
25
				</xsl:when>
26
				<xsl:otherwise>
27
					<xsl:value-of select="unknown_____"/>
28
				</xsl:otherwise>
29
			</xsl:choose>
30
		</xsl:variable>
31

  
32
		<xsl:choose>
33
			<xsl:when
34
					test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'">
35
				<ROWS/>
36
			</xsl:when>
37
			<xsl:otherwise>
38

  
39
				<xsl:variable name="resultId"
40
				              select="dnet:oafSimpleId('result', //dri:objIdentifier)"/>
41

  
42
				<xsl:if test="string-length($resultId) &gt; 0">
43
					<xsl:variable name="originalids"
44
					              select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/>
45
					<xsl:variable name="creators" select="//*[local-name() = 'creator']"/>
46
					<xsl:variable name="titles" select="//*[local-name() = 'title']"/>
47
					<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/>
48
					<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/>
49
					<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/>
50
					<xsl:variable name="dates" select="//*[local-name() = 'date']"/>
51
					<xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/>
52
					<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/>
53
					<xsl:variable name="formats" select="//*[local-name() = 'format']"/>
54
					<xsl:variable name="sizes" select="//*[local-name() = 'size']"/>
55
					<xsl:variable name="rights" select="//oaf:accessrights"/>
56
					<xsl:variable name="version" select="//*[local-name() = 'version']"/>
57
					<xsl:variable name="language" select="//oaf:language"/>
58
					<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/>
59
					<xsl:variable name="contributor" select="//*[local-name() = 'contributor']/*[local-name() = 'contributorName']"/>
60

  
61
					<xsl:variable name="instanceURI">
62
						<xsl:choose>
63
							<xsl:when
64
									test="string-length( //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']) &gt; 0">
65
								<xsl:value-of
66
										select="concat('http://dx.doi.org','/', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']/text())"/>
67
							</xsl:when>
68
							<xsl:otherwise>
69
								<xsl:value-of
70
										select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='URL'])"/>
71
							</xsl:otherwise>
72
						</xsl:choose>
73
					</xsl:variable>
74

  
75
					<xsl:variable name="hostedbyid"
76
					              select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
77
					<xsl:variable name="hostedbyname" select="concat('', //oaf:hostedBy/@name)"/>
78
					<xsl:variable name="collectedfromid"
79
					              select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
80
					<xsl:variable name="collectedfromname"
81
					              select="concat('', //oaf:collectedFrom/@name)"/>
82

  
83
					<xsl:variable name="pids"
84
					              select="//*[local-name() = 'resource']//*[local-name()='identifier'or local-name()='alternateIdentifier']"/>
85

  
86
					<xsl:variable name="result"
87
					              select="dnet:odfResult($resultId, $about, $metadata, $titles, $subjects, $publisher, $descriptions,
88
	                                            $dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributor, $rights,
89
	                                            $version, $pids, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid,
90
	                                            $collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation)"/>
91

  
92
					<ROWS>
93
						<ROW key="{$resultId}" columnFamily="result">
94
							<QUALIFIER name="body" type="base64">
95
								<xsl:value-of select="$result"/>
96
							</QUALIFIER>
97
						</ROW>
98
						<xsl:for-each select="//*[local-name() = 'creator']">
99

  
100
							<xsl:variable name="personIdTemp">
101
								<xsl:choose>
102
									<xsl:when test="string-length(./*[local-name() = 'nameIdentifier']) &gt; 0">
103
										<xsl:value-of
104
												select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')"/>
105
									</xsl:when>
106
									<xsl:otherwise>
107
										<xsl:value-of select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')"/>
108
									</xsl:otherwise>
109
								</xsl:choose>
110
							</xsl:variable>
111

  
112
							<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))"/>
113

  
114
							<xsl:variable name="originalPersonId" select="./*[local-name() = 'nameIdentifier']"/>
115

  
116
							<xsl:if test="string-length($personId) &gt; 0">
117
								<xsl:variable name="position" select="position()"/>
118
								<xsl:variable name="person"
119
								              select="dnet:person($personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname,
120
									                              $originalPersonId, $dateOfCollection, $dateoftransformation, normalize-space(./*[local-name() = 'creatorName']),
121
									                              ./*[local-name() = 'nameIdentifier'], ./*[local-name() = 'nameIdentifier']/@nameIdentifierScheme)"/>
122
								<xsl:variable name="personresult"
123
								              select="dnet:personResult_Authorship($personId, $resultId, $position, 'isAuthorOf', $provenance, $trust, $about)"/>
124
								<xsl:variable name="resultperson"
125
								              select="dnet:personResult_Authorship($resultId, $personId, $position, 'hasAuthor', $provenance, $trust, $about)"/>
126

  
127
								<ROW key="{$personId}" columnFamily="person">
128
									<QUALIFIER name="body" type="base64">
129
										<xsl:value-of select="$person"/>
130
									</QUALIFIER>
131
								</ROW>
132
								<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf">
133
									<QUALIFIER name="{$resultId}" type="base64">
134
										<xsl:value-of select="$personresult"/>
135
									</QUALIFIER>
136
								</ROW>
137
								<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor">
138
									<QUALIFIER name="{$personId}" type="base64">
139
										<xsl:value-of select="$resultperson"/>
140
									</QUALIFIER>
141
								</ROW>
142
							</xsl:if>
143
						</xsl:for-each>
144

  
145
						<xsl:for-each select="//*[local-name()='projectid']">
146

  
147
							<xsl:variable name="projectId"
148
							              select="dnet:oafSplitId('project', normalize-space(.))"/>
149

  
150
							<xsl:variable name="resultproject"
151
							              select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust, $about)"/>
152
							<xsl:variable name="projectresult"
153
							              select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust, $about)"/>
154

  
155
							<xsl:if test="string-length($projectId) &gt; 0">
156
								<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy">
157
									<QUALIFIER name="{$projectId}" type="base64">
158
										<xsl:value-of select="$resultproject"/>
159
									</QUALIFIER>
160
								</ROW>
161
								<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces">
162
									<QUALIFIER name="{$resultId}" type="base64">
163
										<xsl:value-of select="$projectresult"/>
164
									</QUALIFIER>
165
								</ROW>
166
							</xsl:if>
167
						</xsl:for-each>
168

  
169
						<xsl:for-each
170
								select="//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE' and ./@relationType='IsSupplementTo']">
171

  
172
							<!-- relatedDataset ids must be in the openaire format  -->
173
							<xsl:variable name="targetId" select="dnet:oafSimpleId('result', normalize-space(.))"/>
174

  
175
							<xsl:if test="string-length($targetId) &gt; 0">
176

  
177
								<xsl:variable name="resultDataset"
178
								              select="dnet:rel($resultId, $targetId, 'resultResult', 'supplement', 'isSupplementTo', $provenance, $trust, $about)"/>
179
								<xsl:variable name="datasetResult"
180
								              select="dnet:rel($targetId, $resultId, 'resultResult', 'supplement', 'isSupplementedBy', $provenance, $trust, $about)"/>
181

  
182
								<ROW key="{$resultId}" columnFamily="resultResult_supplement_isSupplementTo">
183
									<QUALIFIER name="{$targetId}" type="base64">
184
										<xsl:value-of select="$resultDataset"/>
185
									</QUALIFIER>
186
								</ROW>
187
								<ROW key="{$targetId}" columnFamily="resultResult_supplement_isSupplementedBy">
188
									<QUALIFIER name="{$resultId}" type="base64">
189
										<xsl:value-of select="$datasetResult"/>
190
									</QUALIFIER>
191
								</ROW>
192
							</xsl:if>
193
						</xsl:for-each>
194

  
195
						<xsl:for-each
196
								select="//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE' and ./@relationType='IsPartOf']">
197

  
198
							<!-- relatedDataset ids must be in the openaire format  -->
199
							<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(.))"/>
200

  
201
							<xsl:if test="string-length($datasetId) &gt; 0">
202

  
203
								<xsl:variable name="childParent"
204
								              select="dnet:rel($resultId, $datasetId, 'resultResult', 'part', 'isPartOf', $provenance, $trust, $about)"/>
205
								<xsl:variable name="parentChild"
206
								              select="dnet:rel($datasetId, $resultId, 'resultResult', 'part', 'hasPart', $provenance, $trust, $about)"/>
207

  
208
								<ROW key="{$resultId}" columnFamily="resultResult_part_isPartOf">
209
									<QUALIFIER name="{$datasetId}" type="base64">
210
										<xsl:value-of select="$childParent"/>
211
									</QUALIFIER>
212
								</ROW>
213
								<ROW key="{$datasetId}" columnFamily="resultResult_part_hasPart">
214
									<QUALIFIER name="{$resultId}" type="base64">
215
										<xsl:value-of select="$parentChild"/>
216
									</QUALIFIER>
217
								</ROW>
218
							</xsl:if>
219
						</xsl:for-each>
220

  
221
					</ROWS>
222
				</xsl:if>
223
			</xsl:otherwise>
224
		</xsl:choose>
225
	</xsl:template>
226
</xsl:stylesheet>
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/rels2actions.xslt
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
                xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
                xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa"
5
                xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions"
6
                xmlns:exslt="http://exslt.org/common"
7
                xmlns:action="http://namespace.openaire.eu/action" version="1.0"
8
                extension-element-prefixes="exslt"
9
                exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt">
10

  
11
	<xsl:output omit-xml-declaration="yes" indent="yes" />
12

  
13
	<xsl:param name="trust" select="string('0.9')" />
14
	<xsl:param name="provenance" select="string('UNKNOWN')" />
15
	<xsl:param name="namespaceprefix" select="string('unknown_____')" />
16

  
17
	<xsl:template match="/*">
18

  
19
		<xsl:choose>
20
			<xsl:when test="count(//RELATION) = 0">
21
				<ACTIONS />
22
			</xsl:when>
23
			<xsl:otherwise>
24
				<ACTIONS>
25
					<xsl:for-each select="//RELATION">
26
						<xsl:choose>
27
							<xsl:when test="./@type = 'resultProject'">
28
								<xsl:variable name="resultId" select="./@source" />
29
								<xsl:variable name="projectId">
30
									<xsl:choose>
31
										<xsl:when test="starts-with(@target, '40|')">
32
											<xsl:value-of select="./@target" />
33
										</xsl:when>
34
										<xsl:otherwise>
35
											<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/>
36
										</xsl:otherwise>
37
									</xsl:choose>
38
								</xsl:variable>
39

  
40
								<ACTION targetKey="{$resultId}" targetColumnFamily="resultProject_outcome_isProducedBy" targetColumn="{$projectId}">
41
									<xsl:value-of select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust)" />
42
								</ACTION>
43
								<ACTION targetKey="{$projectId}" targetColumnFamily="resultProject_outcome_produces" targetColumn="{$resultId}">
44
									<xsl:value-of select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust)" />
45
								</ACTION>
46
							</xsl:when>
47

  
48
							<!-- TODO: check this block, it caused problems:
49
									Cannot convert argument/return type in call to method 'eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions.oafPersonResultFromInfoPackage(node-set, node-set, result-tree, reference, reference)'
50
									FATAL ERROR:  'Could not compile stylesheet'
51

  
52
							<xsl:when test="./@type = 'personResult'">
53
								<xsl:variable name="personId" select="./@source" />
54
								<xsl:variable name="resultId" select="./@target" />
55

  
56
								<xsl:variable name="pos">
57
									<xsl:choose>
58
										<xsl:when test="@position"><xsl:value-of select="@position"/></xsl:when>
59
										<xsl:otherwise>1000</xsl:otherwise>
60
									</xsl:choose>
61
								</xsl:variable>
62

  
63
								<ACTION targetKey="{$personId}" targetColumnFamily="personResult" targetColumn="{$resultId}">
64
									<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($personId, $resultId, $pos, $provenance, $trust)" />
65
								</ACTION>
66
								<ACTION targetKey="{$resultId}" targetColumnFamily="personResult" targetColumn="{$personId}">
67
									<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($resultId, $personId, $pos, $provenance, $trust)" />
68
								</ACTION>
69
							</xsl:when>
70
							-->
71
						</xsl:choose>
72
					</xsl:for-each>
73
				</ACTIONS>
74
			</xsl:otherwise>
75
		</xsl:choose>
76
	</xsl:template>
77

  
78
</xsl:stylesheet>
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/oaf2updateActions.xslt
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
                xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
                xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa"
5
                xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions"
6
                xmlns:date="java.lang.System"
7
                xmlns:exslt="http://exslt.org/common"
8
                version="1.0"
9
                extension-element-prefixes="exslt"
10
                exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date">
11

  
12
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
13

  
14
	<xsl:param name="trust" select="string('0.9')"/>
15
	<xsl:param name="provenance" select="string('UNKNOWN')"/>
16
	<xsl:param name="namespaceprefix" select="string('unknown_____')"/>
17

  
18
	<xsl:template match="/*">
19
		<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
20
		<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/>
21
		<xsl:variable name="dateoftransformation" select="//dr:dateOfTransformation"/>
22

  
23
		<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/>
24
		<xsl:variable name="collectedDatasourceid">
25
			<xsl:choose>
26
				<xsl:when test="string-length(//oaf:collectedDatasourceid) &gt; 0">
27
					<xsl:value-of select="//oaf:collectedDatasourceid"/>
28
				</xsl:when>
29
				<xsl:otherwise>
30
					<xsl:value-of select="UNKNOWN"/>
31
				</xsl:otherwise>
32
			</xsl:choose>
33
		</xsl:variable>
34

  
35
		<xsl:choose>
36
			<xsl:when test="count($metadata) = 0">
37
				<ACTIONS/>
38
			</xsl:when>
39
			<xsl:otherwise>
40

  
41
				<xsl:variable name="objidentifier" select="/record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']"/>
42

  
43
				<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)"/>
44

  
45
				<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
46
				<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/>
47

  
48
				<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
49
				<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/>
50

  
51
				<!-- 				<xsl:variable name="country" select="substring(//dr:repositoryCountry,  1, 200)" /> -->
52
				<!-- 				<xsl:variable name="accessmode" select="substring(//oaf:accessrights,   1, 200)" /> -->
53

  
54
				<xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/>
55
				<xsl:variable name="originalid">
56
					<xsl:choose>
57
						<xsl:when test="contains($originalidTest, '::')">
58
							<xsl:value-of select="substring-after($originalidTest, '::')"/>
59
						</xsl:when>
60
						<xsl:otherwise>
61
							<xsl:value-of select="$originalidTest"/>
62
						</xsl:otherwise>
63
					</xsl:choose>
64
				</xsl:variable>
65

  
66
				<xsl:variable name="result"
67
				              select="dnet:oafResult($resultId, $provenance, $trust, $about, $hostedbyid, $hostedbyname,
68
	                          $collectedfromid, $collectedfromname, $originalid, $dateofcollection, $dateoftransformation, $metadata)"/>
69

  
70
				<ACTIONS>
71
					<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:nanoTime())}">
72
						<xsl:value-of select="$result"/>
73
					</ACTION>
74
				</ACTIONS>
75
			</xsl:otherwise>
76
		</xsl:choose>
77
	</xsl:template>
78

  
79
</xsl:stylesheet>
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/oaf2insertActions.xslt
1
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
2
                xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dc="http://purl.org/dc/elements/1.1/"
3
                xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
                xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions"
5
                xmlns:exslt="http://exslt.org/common"
6
                version="1.0" extension-element-prefixes="exslt"
7
                exclude-result-prefixes="xsl oaf dr dri dnet exslt">
8

  
9
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
10

  
11
	<xsl:param name="writeCoAuthors" select="false()"/>
12

  
13
	<xsl:template match="/*">
14
		<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
15
		<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/>
16
		<xsl:variable name="dateoftransformation" select="//dr:dateOfTransformation"/>
17

  
18
		<xsl:variable name="trust" select="string('0.9')"/>
19
		<xsl:variable name="provenance" select="string('sysimport:crosswalk:repository')"/>
20

  
21
		<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/>
22
		<xsl:variable name="namespaceprefix">
23
			<xsl:choose>
24

  
25
				<!-- TODO check namespaceprefix length is 12 -->
26
				<xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
27
					<xsl:value-of select="//oaf:datasourceprefix"/>
28
				</xsl:when>
29
				<xsl:otherwise>
30
					<xsl:value-of select="unknown_"/>
31
				</xsl:otherwise>
32
			</xsl:choose>
33
		</xsl:variable>
34

  
35
		<xsl:choose>
36
			<!-- 			<xsl:when test="count($metadata) = 0 or string-length($namespaceprefix) = 0"> -->
37
			<xsl:when test="count($metadata) = 0">
38
				<ROWS/>
39
			</xsl:when>
40
			<xsl:otherwise>
41

  
42
				<xsl:variable name="objIdentifier" select="//dri:objIdentifier"/>
43
				<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/>
44

  
45
				<xsl:if test="string-length($resultId) &gt; 0">
46

  
47
					<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
48
					<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/>
49

  
50
					<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
51
					<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/>
52

  
53
					<xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/>
54
					<xsl:variable name="originalid">
55
						<xsl:choose>
56
							<xsl:when test="contains($originalidTest, '::')">
57
								<xsl:value-of select="substring-after($originalidTest, '::')"/>
58
							</xsl:when>
59
							<xsl:otherwise>
60
								<xsl:value-of select="$originalidTest"/>
61
							</xsl:otherwise>
62
						</xsl:choose>
63
					</xsl:variable>
64

  
65
					<xsl:variable name="result"
66
					              select="dnet:oafResult($resultId, $provenance, $trust, $about, $hostedbyid, $hostedbyname, $collectedfromid,
67
										              $collectedfromname, $originalid, $dateofcollection, $dateoftransformation, $metadata)"/>
68

  
69
					<ROWS>
70
						<ROW key="{$resultId}" columnFamily="result">
71
							<QUALIFIER name="body" type="base64">
72
								<xsl:value-of select="$result"/>
73
							</QUALIFIER>
74
						</ROW>
75

  
76
						<xsl:for-each select="//*[local-name()='projectid']">
77

  
78
							<xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/>
79

  
80
							<xsl:if test="string-length($projectId) &gt; 0">
81

  
82
								<xsl:variable name="resultproject"
83
								              select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust, $about)"/>
84
								<xsl:variable name="projectresult"
85
								              select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust, $about)"/>
86

  
87
								<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy">
88
									<QUALIFIER name="{$projectId}" type="base64">
89
										<xsl:value-of select="$resultproject"/>
90
									</QUALIFIER>
91
								</ROW>
92
								<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces">
93
									<QUALIFIER name="{$resultId}" type="base64">
94
										<xsl:value-of select="$projectresult"/>
95
									</QUALIFIER>
96
								</ROW>
97
							</xsl:if>
98
						</xsl:for-each>
99

  
100
						<xsl:for-each select="//*[local-name()='relatedDataset']">
101

  
102
							<!-- relatedDataset ids must be in the openaire format  -->
103
							<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/>
104

  
105
							<xsl:if test="string-length($datasetId) &gt; 0">
106

  
107
								<xsl:variable name="resultDataset"
108
								              select="dnet:rel($resultId, $datasetId, 'resultResult', 'publicationDataset', 'isRelatedTo', $provenance, $trust, $about)"/>
109
								<xsl:variable name="datasetResult"
110
								              select="dnet:rel($datasetId, $resultId, 'resultResult', 'publicationDataset', 'isRelatedTo', $provenance, $trust, $about)"/>
111

  
112
								<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo">
113
									<QUALIFIER name="{$datasetId}" type="base64">
114
										<xsl:value-of select="$resultDataset"/>
115
									</QUALIFIER>
116
								</ROW>
117
								<ROW key="{$datasetId}" columnFamily="resultResult_publicationDataset_isRelatedTo">
118
									<QUALIFIER name="{$resultId}" type="base64">
119
										<xsl:value-of select="$datasetResult"/>
120
									</QUALIFIER>
121
								</ROW>
122
							</xsl:if>
123
						</xsl:for-each>
124

  
125
						<xsl:for-each select="//*[local-name()='creator']">
126

  
127
							<xsl:if test="string-length(normalize-space(.)) &gt; 0">
128
								<xsl:variable name="originalPersonId">
129
									<xsl:choose>
130
										<xsl:when test="string-length(@nameIdentifier) &gt; 0">
131
											<xsl:value-of select="normalize-space(@nameIdentifier)"/>
132
										</xsl:when>
133
										<xsl:otherwise>
134
											<xsl:value-of select="concat($originalid, '::', normalize-space(.))"/>
135
										</xsl:otherwise>
136
									</xsl:choose>
137
								</xsl:variable>
138

  
139
								<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, $originalPersonId)"/>
140
								<xsl:variable name="position" select="position()"/>
141

  
142
								<xsl:variable name="person"
143
								              select="dnet:person($personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname,
144
								              $originalPersonId, $dateofcollection, $dateoftransformation, normalize-space(.), @nameIdentifier, @nameIdentifierScheme)"/>
145
								<xsl:variable name="personresult"
146
								              select="dnet:personResult_Authorship($personId, $resultId, $position, 'isAuthorOf', $provenance, $trust, $about)"/>
147
								<xsl:variable name="resultperson"
148
								              select="dnet:personResult_Authorship($resultId, $personId, $position, 'hasAuthor', $provenance, $trust, $about)"/>
149

  
150
								<xsl:if test="string-length($personId) &gt; 0">
151
									<ROW key="{$personId}" columnFamily="person">
152
										<QUALIFIER name="body" type="base64">
153
											<xsl:value-of select="$person"/>
154
										</QUALIFIER>
155
									</ROW>
156
									<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf">
157
										<QUALIFIER name="{$resultId}" type="base64">
158
											<xsl:value-of select="$personresult"/>
159
										</QUALIFIER>
160
									</ROW>
161
									<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor">
162
										<QUALIFIER name="{$personId}" type="base64">
163
											<xsl:value-of select="$resultperson"/>
164
										</QUALIFIER>
165
									</ROW>
166
								</xsl:if>
167

  
168
								<!-- COAUTHORS -->
169
								<xsl:if test="$writeCoAuthors = true()">
170
									<xsl:for-each select="../dc:creator">
171
										<xsl:if test="$position != position()">
172

  
173
											<xsl:variable name="originalCoauthorId">
174
												<xsl:choose>
175
													<xsl:when test="string-length(@nameIdentifier) &gt; 0">
176
														<xsl:value-of select="normalize-space(@nameIdentifier)"/>
177
													</xsl:when>
178
													<xsl:otherwise>
179
														<xsl:value-of select="concat($originalid, '::', normalize-space(.))"/>
180
													</xsl:otherwise>
181
												</xsl:choose>
182
											</xsl:variable>
183

  
184
											<xsl:variable name="coauthorId"
185
											              select="dnet:oafId('person', $namespaceprefix, $originalCoauthorId)"/>
186

  
187
											<xsl:variable name="personperson"
188
											              select="dnet:rel($personId, $coauthorId, 'personPerson', 'coauthorship', 'isCoAuthorOf', $provenance, $trust, $about)"/>
189

  
190
											<ROW key="{$personId}" columnFamily="personPerson_coAuthorship_isCoAuthorOf">
191
												<QUALIFIER name="{$coauthorId}" type="base64">
192
													<xsl:value-of select="$personperson"/>
193
												</QUALIFIER>
194
											</ROW>
195
										</xsl:if>
196
									</xsl:for-each>
197
								</xsl:if>
198
								<!-- / COAUTHORS -->
199
							</xsl:if>
200
						</xsl:for-each>
201

  
202
					</ROWS>
203
				</xsl:if>
204
			</xsl:otherwise>
205
		</xsl:choose>
206
	</xsl:template>
207
</xsl:stylesheet>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="100" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN>
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES>
51
        			<FAMILY param="entityType" />
52
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
53
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
55
        		</FAMILIES>
56
        	</SCAN>
57
        </HADOOP_JOB>
58
        <STATUS>
59
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
60
            <RUNNING_INSTANCES value="0"/>
61
            <CUMULATIVE_RUN value="0" />
62
        </STATUS>
63
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
64
    </BODY>
65
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2HdfsActionsJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="baa63c0c-0ff3-4a15-93c1-e361800e9ca8_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="dedupSimilarity2HdfsActionsJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
17

  
18
				<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToHdfsActionsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22

  
23
				<!-- OUTPUT -->
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
26

  
27
				<!-- MISC -->
28
				<PROPERTY key="mapred.compress.map.output" value="true" />
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
33

  
34
				<PROPERTY key="mapred.output.compress" value="true"/>
35
				<PROPERTY key="mapred.output.compression.type" value="BLOCK"/>
36

  
37
				<PROPERTY key="mapred.reduce.tasks" value="10" />
38
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
39

  
40
				<!--  	Uncomment to override the default lib path -->
41
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
42
			</STATIC_CONFIGURATION>
43
			<JOB_INTERFACE>
44
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
45
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
				<PARAM name="mapred.output.dir" required="true" description="target hbase table" />
47
				<PARAM name="dedup.conf" required="true" description="dedup configuration" />
48
				<PARAM name="rawSetId" required="true" description="raw set identifier" />
49
				<PARAM name="similarityCF" required="true" description="similarity column family name" />
50
			</JOB_INTERFACE>
51
			<SCAN>
52
				<FILTERS operator="MUST_PASS_ALL">
53
					<FILTER type="prefix" param="entityTypeId" />
54
				</FILTERS>
55
				<FAMILIES>
56
					<FAMILY param="entityType" />
57
					<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
58
					<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
59
					<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
60
				</FAMILIES>
61
			</SCAN>
62
		</HADOOP_JOB>
63
		<STATUS>
64
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
65
			<RUNNING_INSTANCES value="0"/>
66
			<CUMULATIVE_RUN value="0" />
67
		</STATUS>
68
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
69
	</BODY>
70
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/sqoopStatsUpdateJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf0ab07b-36bf-4164-ab73-342bfb11e51a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="StatsExportJob" type="oozie">
11
            <DESCRIPTION>Job for importing data from HBASE to the relational Stats Database</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
			
14
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet"/> <!-- username = sqoop?? -->
17
                <PROPERTY key="workingDir" value="/user/dnet/lib/stats/working_dir"/>
18
                <PROPERTY key="numReducers" value="1"/>
19
                
20
				<PROPERTY key="oozie.wf.application.path" value="hdfs://nmis-hadoop-cluster/user/eri.katsari/stats/oozie_app"/><!-- edit this property! -->
21
				<PROPERTY key="Stats_db_Url" value="jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats"/><!-- complete the jdbc url with the actual value! -->
22
				<PROPERTY key="Stats_db_User" value="sqoop"/>
23
				<PROPERTY key="Stats_db_Pass" value="sqoop"/>
24
				<PROPERTY key="Stats_db_Driver" value="org.postgresql.Driver"/>
25
				<PROPERTY key="Stats_db_table_map" value="datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources"/>
26
				<PROPERTY key="Stats_sqoop_RecsPerStatement" value="10000"/>
27
				<PROPERTY key="Stats_sqoop_StatementPerTrans" value="1000000"/>
28
				<PROPERTY key="Stats_sqoop_ReducersCount" value="4"/>
29
				<PROPERTY key="Stats_output_Path" value="/tmp/stats/"/>
30
				<PROPERTY key="Stats_null_String_Field" value="null"/>
31
				<PROPERTY key="Stats_null_Numeric_Field" value="null"/>
32
				<PROPERTY key="Stats_enclosing_Character" value="#"/>
33
				<PROPERTY key="Stats_delim_Character" value="!"/>
34
				<PROPERTY key="out1" value="datasource"/>
35
				<PROPERTY key="out2" value="project"/>
36
				<PROPERTY key="out3" value="organization"/>
37
				<PROPERTY key="out4" value="datasourceOrganization"/>
38
				<PROPERTY key="out5" value="datasourceTopic"/>
39
				<PROPERTY key="out6" value="datasourceLanguage"/>
40
				<PROPERTY key="out7" value="projectOrganization"/>
41
				<PROPERTY key="out8" value="resultClaim"/>
42
				<PROPERTY key="out9" value="resultClassification"/>
43
				<PROPERTY key="out10" value="resultConcept"/>
44
				<PROPERTY key="out11" value="resultLanguage"/>
45
				<PROPERTY key="out12" value="resultOrganization"/>
46
				<PROPERTY key="out13" value="resultResult"/>
47
				<PROPERTY key="out14" value="resultProject"/>
48
				<PROPERTY key="out15" value="category"/>
49
				<PROPERTY key="out16" value="resultTopic"/>
50
				<PROPERTY key="out17" value="resultDatasource"/>
51
				<PROPERTY key="out18" value="result"/>
52
				<PROPERTY key="out19" value="claim"/>
53
				<PROPERTY key="out20" value="concept"/>
54
            </STATIC_CONFIGURATION>
55
            <JOB_INTERFACE>
56
                <PARAM name="nameNode" required="true" description="hdfs name node"/>
57
                <PARAM name="jobTracker" required="true" description="job tracker name"/>
58
                <PARAM name="Stats_Hbase_Source_Table" required="true" description="Hbase Table with Protobuffs."/>
59
                <PARAM name="Stats_indexConf" required="true" description="Index Entity Links configuration."/>
60
                <PARAM name="isLookupEndpoint" required="true" description="IS lookup service endpoint"/>
61
            </JOB_INTERFACE>
62
        </HADOOP_JOB>
63
        <STATUS>
64
            <LAST_SUBMISSION_DATE value="2014-11-14T19:57:25+00:00"/>
65
            <RUNNING_INSTANCES value="0"/>
66
            <CUMULATIVE_RUN value="75"/>
67
        </STATUS>
68
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
69
    </BODY>
70
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsExportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="53f2a9b4-adf3-4ceb-9308-d88b53dc44c5_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupRootsExportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootExportMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
			
23
			
24
				<!-- MISC -->
25

  
26
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
28
				<PROPERTY key="mapreduce.map.speculative" value="false" />
29
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
30

  
31
				<PROPERTY key="dfs.blocksize" value="256M" />
32
			
33
				<PROPERTY key="mapred.reduce.tasks" value="1" />
34
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
35
				
36
		<!--  	Uncomment to override the default lib path -->			
37
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
38
        	</STATIC_CONFIGURATION>
39
        	<JOB_INTERFACE>
40
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
41
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
42
        		
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS operator="MUST_PASS_ALL">
47
        			<FILTER type="prefix" param="entityTypeId" />
48
        		</FILTERS>
49
        		<FAMILIES>
50
	       			<FAMILY param="entityType" />
51
        		</FAMILIES>
52
        	</SCAN>
53
        </HADOOP_JOB>
54
        <STATUS>
55
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
            <RUNNING_INSTANCES value="0"/>
57
            <CUMULATIVE_RUN value="0" />
58
        </STATUS>
59
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
    </BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceImportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="b7d51a07-6996-4841-9a4a-685a044638e3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="informationSpaceImportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that import the whole information space table from a sequence file holding a json dump</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportInformationSpaceDumpMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
			
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
25
				<PROPERTY key="mapreduce.map.speculative" value="false" />
26
				<PROPERTY key="mapred.reduce.tasks" value="0" />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
        	</STATIC_CONFIGURATION>
32
        	<JOB_INTERFACE>
33
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
34
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
35
        		
36
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />         		
37
        	</JOB_INTERFACE>
38
        	<SCAN>
39
        		<FILTERS />
40
        		<FAMILIES />
41
        	</SCAN>
42
        </HADOOP_JOB>
43
        <STATUS>
44
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
45
            <RUNNING_INSTANCES value="0"/>
46
            <CUMULATIVE_RUN value="0" />
47
        </STATUS>
48
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
49
    </BODY>
50
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/resetDedupJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bc4f377a-af07-403d-a019-af60aa557652_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="resetDedupJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that reset the dedup jobs</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"  />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />		
17

  
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.reset.HBaseResetMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"  />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Mutation"  />
22

  
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"  />
25
				<PROPERTY key="mapreduce.map.speculative" value="false"  />
26
				<PROPERTY key="mapred.reduce.tasks" value="0"  />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
				
32
        	</STATIC_CONFIGURATION>
33
        	<JOB_INTERFACE>
34
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
35
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
36
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />        		
38
        	</JOB_INTERFACE>
39
        </HADOOP_JOB>
40
        <STATUS>
41
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
42
            <RUNNING_INSTANCES value="0"/>
43
            <CUMULATIVE_RUN value="0" />
44
        </STATUS>
45
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
46
    </BODY>
47
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/test/java/eu/dnetlib/data/actionmanager/actions/ActionFactoryTest.java
2 2

  
3 3
import java.io.IOException;
4 4
import java.io.InputStream;
5
import java.io.StringReader;
5 6
import java.io.StringWriter;
6 7
import java.util.Arrays;
8
import java.util.HashMap;
7 9
import java.util.List;
8
import javax.annotation.Resource;
10
import java.util.Map;
9 11
import javax.xml.transform.TransformerException;
10 12

  
13
import com.google.common.collect.Maps;
11 14
import eu.dnetlib.data.proto.OafProtos.Oaf;
12 15
import eu.dnetlib.rmi.data.actionmanager.ActionManagerException;
13 16
import eu.dnetlib.rmi.data.actionmanager.Agent;
......
17 20
import org.apache.commons.logging.Log;
18 21
import org.apache.commons.logging.LogFactory;
19 22
import org.apache.hadoop.hbase.client.Put;
23
import org.dom4j.Document;
20 24
import org.dom4j.DocumentException;
25
import org.dom4j.io.SAXReader;
26
import org.junit.Before;
21 27
import org.junit.Test;
22 28
import org.junit.runner.RunWith;
29
import org.mockito.Mock;
30
import org.mockito.Mockito;
31
import org.mockito.runners.MockitoJUnitRunner;
32
import org.springframework.core.io.ClassPathResource;
33
import org.springframework.core.io.Resource;
23 34
import org.springframework.test.context.ContextConfiguration;
24 35
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
25 36

  
26 37
import static org.junit.Assert.*;
27 38

  
28
@RunWith(SpringJUnit4ClassRunner.class)
29
@ContextConfiguration(locations = { "/eu/dnetlib/data/actionmanager/actions/applicationContext-actionmanager-test.xml" })
39
@RunWith(MockitoJUnitRunner.class)
30 40
public class ActionFactoryTest {
31 41

  
32 42
	private static final Log log = LogFactory.getLog(ActionFactoryTest.class);
43
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
33 44
	private final static Agent agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.algo);
34 45
	private final static String trust = "0.9";
35 46
	private final InputStream recordOdfInputStream = getClass().getResourceAsStream("recordOdf.xml");
36 47
	private final InputStream recordOafInputStream = getClass().getResourceAsStream("recordOaf.xml");
37
	@Resource
38
	private eu.dnetlib.data.actionmanager.actions.ActionFactory actionFactory;
39 48

  
40
	// @Before
41
	// public void setUp() throws Exception {
42
	//
43
	// }
49
	private ActionFactory actionFactory;
50
	private Map<String, Resource> xslts = new HashMap<>();
44 51

  
52
	@Mock
53
	private Resource oaf2insertActions;
54
	@Mock
55
	private Resource oaf2updateActions;
56
	@Mock
57
	private Resource odf2insertActions;
58
	@Mock
59
	private Resource odf2updateActions;
60
	@Mock
61
	private Resource rels2actions;
62

  
63
	@Before
64
	public void setUp() throws Exception {
65
		actionFactory = new ActionFactory();
66

  
67
		xslts.put("oaf2insertActions", oaf2insertActions);
68
		xslts.put("oaf2updateActions", oaf2updateActions);
69
		xslts.put("odf2insertActions", odf2insertActions);
70
		xslts.put("odf2updateActions", odf2updateActions);
71
		xslts.put("rels2actions", rels2actions);
72

  
73
		Mockito.when(oaf2insertActions.getInputStream()).thenReturn(loadFromTransformationProfile("oaf2insertActions.xml"));
74
		Mockito.when(oaf2updateActions.getInputStream()).thenReturn(loadFromTransformationProfile("oaf2updateActions.xml"));
75
		Mockito.when(odf2insertActions.getInputStream()).thenReturn(loadFromTransformationProfile("odf2insertActions.xml"));
76
		Mockito.when(odf2updateActions.getInputStream()).thenReturn(loadFromTransformationProfile("odf2updateActions.xml"));
77
//		Mockito.when(rels2actions.getInputStream()).thenReturn(loadFromTransformationProfile("rels2actions.xml"));
78

  
79
		actionFactory.setXslts(xslts);
80
	}
81

  
45 82
	@Test
46 83
	public void testBeans() {
47 84
		assertNotNull(actionFactory);
......
93 130
		for (AtomicAction a : action.calculateAtomicActions()) {
94 131
			assertNotNull(a.getTargetValue());
95 132

  
96
			String actionJSON = a.toString();
133
			String actionJSON = AtomicActionSerialiser.toJSON(a);
97 134

  
98 135
			log.info(actionJSON);
99 136

  
......
126 163
		IOUtils.copy(i, writer);
127 164
		return writer.toString();
128 165
	}
166

  
167
	private InputStream loadFromTransformationProfile(final String profilePath) {
168
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
169
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
170
		final String s;
171
		try {
172
			s = IOUtils.toString(profile);
173
			log.debug("profile: " + s);
174
		} catch (IOException e) {
175
			e.printStackTrace();
176
			throw new RuntimeException(e);
177
		}
178

  
179
		final SAXReader saxReader = new SAXReader();
180
		Document doc = null;
181

  
182
		try {
183
			doc = saxReader.read(new StringReader(s));
184
		} catch (DocumentException e) {
185
			e.printStackTrace();
186
			throw new RuntimeException(e);
187
		}
188
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
189
		return IOUtils.toInputStream(xslt);
190
	}
129 191
}
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="offlineHbaseLoad" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
48
        		</FAMILIES>
49
        	</SCAN>
50
        </HADOOP_JOB>
51
        <STATUS>
52
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
            <RUNNING_INSTANCES value="0"/>
54
            <CUMULATIVE_RUN value="0" />
55
        </STATUS>
56
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
    </BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
    	<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie">
11
        	<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet" />
16

  
17
				<!-- Runtime -->
18
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/>
19
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
20
                <PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/>
21
                <PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/>
22
                <PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
            </STATIC_CONFIGURATION>
24
        	<JOB_INTERFACE>
25
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
26
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
28
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
29
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
30
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
31
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
32
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
33
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
35
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
36
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
37
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
38
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
39
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
40
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
41
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
42
        	</JOB_INTERFACE>
43
        </HADOOP_JOB>
44
        <STATUS>
45
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
46
            <RUNNING_INSTANCES value="0"/>
47
            <CUMULATIVE_RUN value="0" />
48
        </STATUS>
49
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
50
    </BODY>
51
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisPreprocessingJob" type="oozie">
12
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14
				<!-- Cluster wide -->
15
				<PROPERTY key="queueName" value="default"/>
16
				<PROPERTY key="user.name" value="dnet.beta"/>
17

  
18
				<!-- Runtime -->
19
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/>
20
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/>
21
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
22
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
				<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25
				<PROPERTY key="metadataextraction_excluded_checksums"
26
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
28
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
29
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
30
			</STATIC_CONFIGURATION>
31
			<JOB_INTERFACE>
32
				<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/>
33
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
34
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/>
35
				<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/>
36
				<PARAM name="import_database_service_location" required="true" description="database service endpoint"/>
37
				<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/>
38
				<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/>
39
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
40
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
41
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
42
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
43
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
44
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
45
				<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/>
46
				<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/>
47
				<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/>
48
				<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/>
49
			</JOB_INTERFACE>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/connectedComponentsJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="28e8d911-87e5-4f39-9ce9-0d445126c75f_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="connectedComponentsJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job joins all the vertex ids to build the connected components in the graph</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsReducer"/>
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.client.Put"/>
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.compress.map.output" value="false"/>
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38

  
39
				<!--  	Uncomment to override the default lib path -->
40
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="source path on hdfs"/>
44
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
45
			</JOB_INTERFACE>
46
			<SCAN>
47
				<FILTERS/>
48
				<FAMILIES/>
49
			</SCAN>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff