Revision 42821
Added by Claudio Atzori over 8 years ago
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/applicationContext-actionmanager-test.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
3 |
xmlns:jaxws="http://cxf.apache.org/jaxws" xmlns:sec="http://cxf.apache.org/configuration/security" |
|
4 |
xmlns:wsa="http://cxf.apache.org/ws/addressing" xmlns:p="http://www.springframework.org/schema/p" |
|
5 |
xmlns:http="http://cxf.apache.org/transports/http/configuration" xmlns:t="http://dnetlib.eu/springbeans/t" |
|
6 |
xmlns:template="http://dnetlib.eu/springbeans/template" xmlns:util="http://www.springframework.org/schema/util" |
|
7 |
xmlns="http://www.springframework.org/schema/beans" |
|
8 |
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd |
|
9 |
http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd |
|
10 |
http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd |
|
11 |
http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd |
|
12 |
http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd |
|
13 |
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd |
|
14 |
http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd"> |
|
15 |
|
|
16 |
|
|
17 |
<bean id="actionFactory" class="eu.dnetlib.data.actionmanager.actions.ActionFactory"> |
|
18 |
<property name="xslts"> |
|
19 |
<util:map> |
|
20 |
<entry key="oaf2insertActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/oaf2insertActions.xslt" /> |
|
21 |
<entry key="oaf2updateActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/oaf2updateActions.xslt" /> |
|
22 |
<entry key="odf2insertActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/odf2insertActions.xslt" /> |
|
23 |
<entry key="odf2updateActions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/odf2updateActions.xslt" /> |
|
24 |
<entry key="rels2Actions" value="classpath:/eu/dnetlib/data/actionmanager/actions/xslt/rels2actions.xslt" /> |
|
25 |
</util:map> |
|
26 |
</property> |
|
27 |
</bean> |
|
28 |
|
|
29 |
</beans> |
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/odf2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dnet="eu.dnetlib.data.transform.xml.OdfToHbaseXsltFunctions" |
|
4 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
5 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:date="java.lang.System" |
|
7 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
8 |
xmlns:exslt="http://exslt.org/common" version="1.0" |
|
9 |
extension-element-prefixes="exslt" exclude-result-prefixes="xsl dnet exslt oaf dr dri date"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
12 |
|
|
13 |
<xsl:param name="trust" select="string('0.9')"/> |
|
14 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
15 |
<xsl:param name="namespaceprefix" select="string('datacite____')"/> |
|
16 |
|
|
17 |
<xsl:template match="/*"> |
|
18 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
19 |
<xsl:variable name="metadata" select="exslt:node-set(/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource'])"/> |
|
20 |
<xsl:variable name="rightNSPrefix"> |
|
21 |
<xsl:choose> |
|
22 |
<xsl:when test="not($namespaceprefix)"> |
|
23 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
24 |
</xsl:when> |
|
25 |
<xsl:otherwise> |
|
26 |
<xsl:value-of select="$namespaceprefix"/> |
|
27 |
</xsl:otherwise> |
|
28 |
</xsl:choose> |
|
29 |
</xsl:variable> |
|
30 |
|
|
31 |
<xsl:choose> |
|
32 |
<xsl:when test="count($metadata) = 0"> |
|
33 |
<ACTIONS/> |
|
34 |
</xsl:when> |
|
35 |
<xsl:otherwise> |
|
36 |
<xsl:variable name="originalId" select="//*[local-name() = 'identifier' and ./@identifierType='DOI']"/> |
|
37 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
38 |
|
|
39 |
<xsl:if test="string-length($resultId) > 0"> |
|
40 |
<xsl:variable name="originalids" |
|
41 |
select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/> |
|
42 |
|
|
43 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']"/> |
|
44 |
<xsl:variable name="titles" select="//*[local-name() = 'title']"/> |
|
45 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/> |
|
46 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/> |
|
47 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/> |
|
48 |
<xsl:variable name="dates" select="//*[local-name() = 'date']"/> |
|
49 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/> |
|
50 |
<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/> |
|
51 |
<xsl:variable name="formats" select="//*[local-name() = 'format']"/> |
|
52 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']"/> |
|
53 |
<xsl:variable name="rights" select="//oaf:accessrights"/> |
|
54 |
<xsl:variable name="version" select="//*[local-name() = 'version']"/> |
|
55 |
<xsl:variable name="instanceURI" |
|
56 |
select="concat('http://dx.doi.org','/',//*[local-name() = 'resource']/*[local-name() = 'identifier'])"/> |
|
57 |
<xsl:variable name="hostedbyid" |
|
58 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
59 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
60 |
<xsl:variable name="collectedfromid" |
|
61 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
62 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
63 |
<xsl:variable name="dateOfCollection" select="//dr:dateOfCollection"/> |
|
64 |
<xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/> |
|
65 |
<xsl:variable name="language" select="//oaf:language"/> |
|
66 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/> |
|
67 |
<xsl:variable name="contributors" select="//*[local-name() = 'contributorName']"/> |
|
68 |
<xsl:variable name="pids" select="//*[local-name() = 'resource']/*[local-name()='identifier']"/> |
|
69 |
|
|
70 |
<xsl:variable name="result" |
|
71 |
select="dnet:odfResult($resultId, $about, $metadata, $titles, $subjects, $publisher, $descriptions, |
|
72 |
$dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributors, $rights, |
|
73 |
$version, $pids, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, |
|
74 |
$collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation)"/> |
|
75 |
<ACTIONS> |
|
76 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:nanoTime())}"> |
|
77 |
<xsl:value-of select="$result"/> |
|
78 |
</ACTION> |
|
79 |
</ACTIONS> |
|
80 |
</xsl:if> |
|
81 |
</xsl:otherwise> |
|
82 |
</xsl:choose> |
|
83 |
</xsl:template> |
|
84 |
</xsl:stylesheet> |
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/odf2insertActions.xslt | ||
---|---|---|
1 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
2 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
3 |
xmlns:dnet="eu.dnetlib.data.transform.xml.OdfToHbaseXsltFunctions" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:exslt="http://exslt.org/common" |
|
6 |
version="1.0" extension-element-prefixes="exslt" |
|
7 |
exclude-result-prefixes="xsl oaf dr dri dnet exslt"> |
|
8 |
|
|
9 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
10 |
<xsl:template match="/*"> |
|
11 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
12 |
<xsl:variable name="dateOfCollection" select="concat('', //dri:dateOfCollection)"/> |
|
13 |
<xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/> |
|
14 |
|
|
15 |
<xsl:variable name="trust" select="string('0.9')"/> |
|
16 |
<xsl:variable name="provenance" select="string('sysimport:crosswalk:datasetarchive')"/> |
|
17 |
|
|
18 |
<xsl:variable name="metadata" |
|
19 |
select="exslt:node-set(//*[local-name()='metadata']/*)"/> |
|
20 |
<xsl:variable name="namespaceprefix"> |
|
21 |
<xsl:choose> |
|
22 |
<!-- TODO check namespaceprefix length is 12 --> |
|
23 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
24 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
25 |
</xsl:when> |
|
26 |
<xsl:otherwise> |
|
27 |
<xsl:value-of select="unknown_____"/> |
|
28 |
</xsl:otherwise> |
|
29 |
</xsl:choose> |
|
30 |
</xsl:variable> |
|
31 |
|
|
32 |
<xsl:choose> |
|
33 |
<xsl:when |
|
34 |
test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'"> |
|
35 |
<ROWS/> |
|
36 |
</xsl:when> |
|
37 |
<xsl:otherwise> |
|
38 |
|
|
39 |
<xsl:variable name="resultId" |
|
40 |
select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
41 |
|
|
42 |
<xsl:if test="string-length($resultId) > 0"> |
|
43 |
<xsl:variable name="originalids" |
|
44 |
select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/> |
|
45 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']"/> |
|
46 |
<xsl:variable name="titles" select="//*[local-name() = 'title']"/> |
|
47 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/> |
|
48 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/> |
|
49 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/> |
|
50 |
<xsl:variable name="dates" select="//*[local-name() = 'date']"/> |
|
51 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/> |
|
52 |
<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/> |
|
53 |
<xsl:variable name="formats" select="//*[local-name() = 'format']"/> |
|
54 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']"/> |
|
55 |
<xsl:variable name="rights" select="//oaf:accessrights"/> |
|
56 |
<xsl:variable name="version" select="//*[local-name() = 'version']"/> |
|
57 |
<xsl:variable name="language" select="//oaf:language"/> |
|
58 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/> |
|
59 |
<xsl:variable name="contributor" select="//*[local-name() = 'contributor']/*[local-name() = 'contributorName']"/> |
|
60 |
|
|
61 |
<xsl:variable name="instanceURI"> |
|
62 |
<xsl:choose> |
|
63 |
<xsl:when |
|
64 |
test="string-length( //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']) > 0"> |
|
65 |
<xsl:value-of |
|
66 |
select="concat('http://dx.doi.org','/', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']/text())"/> |
|
67 |
</xsl:when> |
|
68 |
<xsl:otherwise> |
|
69 |
<xsl:value-of |
|
70 |
select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='URL'])"/> |
|
71 |
</xsl:otherwise> |
|
72 |
</xsl:choose> |
|
73 |
</xsl:variable> |
|
74 |
|
|
75 |
<xsl:variable name="hostedbyid" |
|
76 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
77 |
<xsl:variable name="hostedbyname" select="concat('', //oaf:hostedBy/@name)"/> |
|
78 |
<xsl:variable name="collectedfromid" |
|
79 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
80 |
<xsl:variable name="collectedfromname" |
|
81 |
select="concat('', //oaf:collectedFrom/@name)"/> |
|
82 |
|
|
83 |
<xsl:variable name="pids" |
|
84 |
select="//*[local-name() = 'resource']//*[local-name()='identifier'or local-name()='alternateIdentifier']"/> |
|
85 |
|
|
86 |
<xsl:variable name="result" |
|
87 |
select="dnet:odfResult($resultId, $about, $metadata, $titles, $subjects, $publisher, $descriptions, |
|
88 |
$dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributor, $rights, |
|
89 |
$version, $pids, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, |
|
90 |
$collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation)"/> |
|
91 |
|
|
92 |
<ROWS> |
|
93 |
<ROW key="{$resultId}" columnFamily="result"> |
|
94 |
<QUALIFIER name="body" type="base64"> |
|
95 |
<xsl:value-of select="$result"/> |
|
96 |
</QUALIFIER> |
|
97 |
</ROW> |
|
98 |
<xsl:for-each select="//*[local-name() = 'creator']"> |
|
99 |
|
|
100 |
<xsl:variable name="personIdTemp"> |
|
101 |
<xsl:choose> |
|
102 |
<xsl:when test="string-length(./*[local-name() = 'nameIdentifier']) > 0"> |
|
103 |
<xsl:value-of |
|
104 |
select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')"/> |
|
105 |
</xsl:when> |
|
106 |
<xsl:otherwise> |
|
107 |
<xsl:value-of select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')"/> |
|
108 |
</xsl:otherwise> |
|
109 |
</xsl:choose> |
|
110 |
</xsl:variable> |
|
111 |
|
|
112 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))"/> |
|
113 |
|
|
114 |
<xsl:variable name="originalPersonId" select="./*[local-name() = 'nameIdentifier']"/> |
|
115 |
|
|
116 |
<xsl:if test="string-length($personId) > 0"> |
|
117 |
<xsl:variable name="position" select="position()"/> |
|
118 |
<xsl:variable name="person" |
|
119 |
select="dnet:person($personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname, |
|
120 |
$originalPersonId, $dateOfCollection, $dateoftransformation, normalize-space(./*[local-name() = 'creatorName']), |
|
121 |
./*[local-name() = 'nameIdentifier'], ./*[local-name() = 'nameIdentifier']/@nameIdentifierScheme)"/> |
|
122 |
<xsl:variable name="personresult" |
|
123 |
select="dnet:personResult_Authorship($personId, $resultId, $position, 'isAuthorOf', $provenance, $trust, $about)"/> |
|
124 |
<xsl:variable name="resultperson" |
|
125 |
select="dnet:personResult_Authorship($resultId, $personId, $position, 'hasAuthor', $provenance, $trust, $about)"/> |
|
126 |
|
|
127 |
<ROW key="{$personId}" columnFamily="person"> |
|
128 |
<QUALIFIER name="body" type="base64"> |
|
129 |
<xsl:value-of select="$person"/> |
|
130 |
</QUALIFIER> |
|
131 |
</ROW> |
|
132 |
<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf"> |
|
133 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
134 |
<xsl:value-of select="$personresult"/> |
|
135 |
</QUALIFIER> |
|
136 |
</ROW> |
|
137 |
<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor"> |
|
138 |
<QUALIFIER name="{$personId}" type="base64"> |
|
139 |
<xsl:value-of select="$resultperson"/> |
|
140 |
</QUALIFIER> |
|
141 |
</ROW> |
|
142 |
</xsl:if> |
|
143 |
</xsl:for-each> |
|
144 |
|
|
145 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
146 |
|
|
147 |
<xsl:variable name="projectId" |
|
148 |
select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
149 |
|
|
150 |
<xsl:variable name="resultproject" |
|
151 |
select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust, $about)"/> |
|
152 |
<xsl:variable name="projectresult" |
|
153 |
select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust, $about)"/> |
|
154 |
|
|
155 |
<xsl:if test="string-length($projectId) > 0"> |
|
156 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
157 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
158 |
<xsl:value-of select="$resultproject"/> |
|
159 |
</QUALIFIER> |
|
160 |
</ROW> |
|
161 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
162 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
163 |
<xsl:value-of select="$projectresult"/> |
|
164 |
</QUALIFIER> |
|
165 |
</ROW> |
|
166 |
</xsl:if> |
|
167 |
</xsl:for-each> |
|
168 |
|
|
169 |
<xsl:for-each |
|
170 |
select="//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE' and ./@relationType='IsSupplementTo']"> |
|
171 |
|
|
172 |
<!-- relatedDataset ids must be in the openaire format --> |
|
173 |
<xsl:variable name="targetId" select="dnet:oafSimpleId('result', normalize-space(.))"/> |
|
174 |
|
|
175 |
<xsl:if test="string-length($targetId) > 0"> |
|
176 |
|
|
177 |
<xsl:variable name="resultDataset" |
|
178 |
select="dnet:rel($resultId, $targetId, 'resultResult', 'supplement', 'isSupplementTo', $provenance, $trust, $about)"/> |
|
179 |
<xsl:variable name="datasetResult" |
|
180 |
select="dnet:rel($targetId, $resultId, 'resultResult', 'supplement', 'isSupplementedBy', $provenance, $trust, $about)"/> |
|
181 |
|
|
182 |
<ROW key="{$resultId}" columnFamily="resultResult_supplement_isSupplementTo"> |
|
183 |
<QUALIFIER name="{$targetId}" type="base64"> |
|
184 |
<xsl:value-of select="$resultDataset"/> |
|
185 |
</QUALIFIER> |
|
186 |
</ROW> |
|
187 |
<ROW key="{$targetId}" columnFamily="resultResult_supplement_isSupplementedBy"> |
|
188 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
189 |
<xsl:value-of select="$datasetResult"/> |
|
190 |
</QUALIFIER> |
|
191 |
</ROW> |
|
192 |
</xsl:if> |
|
193 |
</xsl:for-each> |
|
194 |
|
|
195 |
<xsl:for-each |
|
196 |
select="//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE' and ./@relationType='IsPartOf']"> |
|
197 |
|
|
198 |
<!-- relatedDataset ids must be in the openaire format --> |
|
199 |
<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(.))"/> |
|
200 |
|
|
201 |
<xsl:if test="string-length($datasetId) > 0"> |
|
202 |
|
|
203 |
<xsl:variable name="childParent" |
|
204 |
select="dnet:rel($resultId, $datasetId, 'resultResult', 'part', 'isPartOf', $provenance, $trust, $about)"/> |
|
205 |
<xsl:variable name="parentChild" |
|
206 |
select="dnet:rel($datasetId, $resultId, 'resultResult', 'part', 'hasPart', $provenance, $trust, $about)"/> |
|
207 |
|
|
208 |
<ROW key="{$resultId}" columnFamily="resultResult_part_isPartOf"> |
|
209 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
210 |
<xsl:value-of select="$childParent"/> |
|
211 |
</QUALIFIER> |
|
212 |
</ROW> |
|
213 |
<ROW key="{$datasetId}" columnFamily="resultResult_part_hasPart"> |
|
214 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
215 |
<xsl:value-of select="$parentChild"/> |
|
216 |
</QUALIFIER> |
|
217 |
</ROW> |
|
218 |
</xsl:if> |
|
219 |
</xsl:for-each> |
|
220 |
|
|
221 |
</ROWS> |
|
222 |
</xsl:if> |
|
223 |
</xsl:otherwise> |
|
224 |
</xsl:choose> |
|
225 |
</xsl:template> |
|
226 |
</xsl:stylesheet> |
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/rels2actions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
5 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions" |
|
6 |
xmlns:exslt="http://exslt.org/common" |
|
7 |
xmlns:action="http://namespace.openaire.eu/action" version="1.0" |
|
8 |
extension-element-prefixes="exslt" |
|
9 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
12 |
|
|
13 |
<xsl:param name="trust" select="string('0.9')" /> |
|
14 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
15 |
<xsl:param name="namespaceprefix" select="string('unknown_____')" /> |
|
16 |
|
|
17 |
<xsl:template match="/*"> |
|
18 |
|
|
19 |
<xsl:choose> |
|
20 |
<xsl:when test="count(//RELATION) = 0"> |
|
21 |
<ACTIONS /> |
|
22 |
</xsl:when> |
|
23 |
<xsl:otherwise> |
|
24 |
<ACTIONS> |
|
25 |
<xsl:for-each select="//RELATION"> |
|
26 |
<xsl:choose> |
|
27 |
<xsl:when test="./@type = 'resultProject'"> |
|
28 |
<xsl:variable name="resultId" select="./@source" /> |
|
29 |
<xsl:variable name="projectId"> |
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="starts-with(@target, '40|')"> |
|
32 |
<xsl:value-of select="./@target" /> |
|
33 |
</xsl:when> |
|
34 |
<xsl:otherwise> |
|
35 |
<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/> |
|
36 |
</xsl:otherwise> |
|
37 |
</xsl:choose> |
|
38 |
</xsl:variable> |
|
39 |
|
|
40 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultProject_outcome_isProducedBy" targetColumn="{$projectId}"> |
|
41 |
<xsl:value-of select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust)" /> |
|
42 |
</ACTION> |
|
43 |
<ACTION targetKey="{$projectId}" targetColumnFamily="resultProject_outcome_produces" targetColumn="{$resultId}"> |
|
44 |
<xsl:value-of select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust)" /> |
|
45 |
</ACTION> |
|
46 |
</xsl:when> |
|
47 |
|
|
48 |
<!-- TODO: check this block, it caused problems: |
|
49 |
Cannot convert argument/return type in call to method 'eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions.oafPersonResultFromInfoPackage(node-set, node-set, result-tree, reference, reference)' |
|
50 |
FATAL ERROR: 'Could not compile stylesheet' |
|
51 |
|
|
52 |
<xsl:when test="./@type = 'personResult'"> |
|
53 |
<xsl:variable name="personId" select="./@source" /> |
|
54 |
<xsl:variable name="resultId" select="./@target" /> |
|
55 |
|
|
56 |
<xsl:variable name="pos"> |
|
57 |
<xsl:choose> |
|
58 |
<xsl:when test="@position"><xsl:value-of select="@position"/></xsl:when> |
|
59 |
<xsl:otherwise>1000</xsl:otherwise> |
|
60 |
</xsl:choose> |
|
61 |
</xsl:variable> |
|
62 |
|
|
63 |
<ACTION targetKey="{$personId}" targetColumnFamily="personResult" targetColumn="{$resultId}"> |
|
64 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($personId, $resultId, $pos, $provenance, $trust)" /> |
|
65 |
</ACTION> |
|
66 |
<ACTION targetKey="{$resultId}" targetColumnFamily="personResult" targetColumn="{$personId}"> |
|
67 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($resultId, $personId, $pos, $provenance, $trust)" /> |
|
68 |
</ACTION> |
|
69 |
</xsl:when> |
|
70 |
--> |
|
71 |
</xsl:choose> |
|
72 |
</xsl:for-each> |
|
73 |
</ACTIONS> |
|
74 |
</xsl:otherwise> |
|
75 |
</xsl:choose> |
|
76 |
</xsl:template> |
|
77 |
|
|
78 |
</xsl:stylesheet> |
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/oaf2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
5 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions" |
|
6 |
xmlns:date="java.lang.System" |
|
7 |
xmlns:exslt="http://exslt.org/common" |
|
8 |
version="1.0" |
|
9 |
extension-element-prefixes="exslt" |
|
10 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date"> |
|
11 |
|
|
12 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
13 |
|
|
14 |
<xsl:param name="trust" select="string('0.9')"/> |
|
15 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
16 |
<xsl:param name="namespaceprefix" select="string('unknown_____')"/> |
|
17 |
|
|
18 |
<xsl:template match="/*"> |
|
19 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
20 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
21 |
<xsl:variable name="dateoftransformation" select="//dr:dateOfTransformation"/> |
|
22 |
|
|
23 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/> |
|
24 |
<xsl:variable name="collectedDatasourceid"> |
|
25 |
<xsl:choose> |
|
26 |
<xsl:when test="string-length(//oaf:collectedDatasourceid) > 0"> |
|
27 |
<xsl:value-of select="//oaf:collectedDatasourceid"/> |
|
28 |
</xsl:when> |
|
29 |
<xsl:otherwise> |
|
30 |
<xsl:value-of select="UNKNOWN"/> |
|
31 |
</xsl:otherwise> |
|
32 |
</xsl:choose> |
|
33 |
</xsl:variable> |
|
34 |
|
|
35 |
<xsl:choose> |
|
36 |
<xsl:when test="count($metadata) = 0"> |
|
37 |
<ACTIONS/> |
|
38 |
</xsl:when> |
|
39 |
<xsl:otherwise> |
|
40 |
|
|
41 |
<xsl:variable name="objidentifier" select="/record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']"/> |
|
42 |
|
|
43 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)"/> |
|
44 |
|
|
45 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
46 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
47 |
|
|
48 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
49 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
50 |
|
|
51 |
<!-- <xsl:variable name="country" select="substring(//dr:repositoryCountry, 1, 200)" /> --> |
|
52 |
<!-- <xsl:variable name="accessmode" select="substring(//oaf:accessrights, 1, 200)" /> --> |
|
53 |
|
|
54 |
<xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/> |
|
55 |
<xsl:variable name="originalid"> |
|
56 |
<xsl:choose> |
|
57 |
<xsl:when test="contains($originalidTest, '::')"> |
|
58 |
<xsl:value-of select="substring-after($originalidTest, '::')"/> |
|
59 |
</xsl:when> |
|
60 |
<xsl:otherwise> |
|
61 |
<xsl:value-of select="$originalidTest"/> |
|
62 |
</xsl:otherwise> |
|
63 |
</xsl:choose> |
|
64 |
</xsl:variable> |
|
65 |
|
|
66 |
<xsl:variable name="result" |
|
67 |
select="dnet:oafResult($resultId, $provenance, $trust, $about, $hostedbyid, $hostedbyname, |
|
68 |
$collectedfromid, $collectedfromname, $originalid, $dateofcollection, $dateoftransformation, $metadata)"/> |
|
69 |
|
|
70 |
<ACTIONS> |
|
71 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:nanoTime())}"> |
|
72 |
<xsl:value-of select="$result"/> |
|
73 |
</ACTION> |
|
74 |
</ACTIONS> |
|
75 |
</xsl:otherwise> |
|
76 |
</xsl:choose> |
|
77 |
</xsl:template> |
|
78 |
|
|
79 |
</xsl:stylesheet> |
modules/dnet-openaire-domain/trunk/src/test/resources/eu/dnetlib/data/actionmanager/actions/xslt/oaf2insertActions.xslt | ||
---|---|---|
1 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
2 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions" |
|
5 |
xmlns:exslt="http://exslt.org/common" |
|
6 |
version="1.0" extension-element-prefixes="exslt" |
|
7 |
exclude-result-prefixes="xsl oaf dr dri dnet exslt"> |
|
8 |
|
|
9 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
10 |
|
|
11 |
<xsl:param name="writeCoAuthors" select="false()"/> |
|
12 |
|
|
13 |
<xsl:template match="/*"> |
|
14 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
15 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
16 |
<xsl:variable name="dateoftransformation" select="//dr:dateOfTransformation"/> |
|
17 |
|
|
18 |
<xsl:variable name="trust" select="string('0.9')"/> |
|
19 |
<xsl:variable name="provenance" select="string('sysimport:crosswalk:repository')"/> |
|
20 |
|
|
21 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/> |
|
22 |
<xsl:variable name="namespaceprefix"> |
|
23 |
<xsl:choose> |
|
24 |
|
|
25 |
<!-- TODO check namespaceprefix length is 12 --> |
|
26 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
27 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
28 |
</xsl:when> |
|
29 |
<xsl:otherwise> |
|
30 |
<xsl:value-of select="unknown_"/> |
|
31 |
</xsl:otherwise> |
|
32 |
</xsl:choose> |
|
33 |
</xsl:variable> |
|
34 |
|
|
35 |
<xsl:choose> |
|
36 |
<!-- <xsl:when test="count($metadata) = 0 or string-length($namespaceprefix) = 0"> --> |
|
37 |
<xsl:when test="count($metadata) = 0"> |
|
38 |
<ROWS/> |
|
39 |
</xsl:when> |
|
40 |
<xsl:otherwise> |
|
41 |
|
|
42 |
<xsl:variable name="objIdentifier" select="//dri:objIdentifier"/> |
|
43 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
44 |
|
|
45 |
<xsl:if test="string-length($resultId) > 0"> |
|
46 |
|
|
47 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
48 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
49 |
|
|
50 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
51 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
52 |
|
|
53 |
<xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/> |
|
54 |
<xsl:variable name="originalid"> |
|
55 |
<xsl:choose> |
|
56 |
<xsl:when test="contains($originalidTest, '::')"> |
|
57 |
<xsl:value-of select="substring-after($originalidTest, '::')"/> |
|
58 |
</xsl:when> |
|
59 |
<xsl:otherwise> |
|
60 |
<xsl:value-of select="$originalidTest"/> |
|
61 |
</xsl:otherwise> |
|
62 |
</xsl:choose> |
|
63 |
</xsl:variable> |
|
64 |
|
|
65 |
<xsl:variable name="result" |
|
66 |
select="dnet:oafResult($resultId, $provenance, $trust, $about, $hostedbyid, $hostedbyname, $collectedfromid, |
|
67 |
$collectedfromname, $originalid, $dateofcollection, $dateoftransformation, $metadata)"/> |
|
68 |
|
|
69 |
<ROWS> |
|
70 |
<ROW key="{$resultId}" columnFamily="result"> |
|
71 |
<QUALIFIER name="body" type="base64"> |
|
72 |
<xsl:value-of select="$result"/> |
|
73 |
</QUALIFIER> |
|
74 |
</ROW> |
|
75 |
|
|
76 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
77 |
|
|
78 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
79 |
|
|
80 |
<xsl:if test="string-length($projectId) > 0"> |
|
81 |
|
|
82 |
<xsl:variable name="resultproject" |
|
83 |
select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust, $about)"/> |
|
84 |
<xsl:variable name="projectresult" |
|
85 |
select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust, $about)"/> |
|
86 |
|
|
87 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
88 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
89 |
<xsl:value-of select="$resultproject"/> |
|
90 |
</QUALIFIER> |
|
91 |
</ROW> |
|
92 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
93 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
94 |
<xsl:value-of select="$projectresult"/> |
|
95 |
</QUALIFIER> |
|
96 |
</ROW> |
|
97 |
</xsl:if> |
|
98 |
</xsl:for-each> |
|
99 |
|
|
100 |
<xsl:for-each select="//*[local-name()='relatedDataset']"> |
|
101 |
|
|
102 |
<!-- relatedDataset ids must be in the openaire format --> |
|
103 |
<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
104 |
|
|
105 |
<xsl:if test="string-length($datasetId) > 0"> |
|
106 |
|
|
107 |
<xsl:variable name="resultDataset" |
|
108 |
select="dnet:rel($resultId, $datasetId, 'resultResult', 'publicationDataset', 'isRelatedTo', $provenance, $trust, $about)"/> |
|
109 |
<xsl:variable name="datasetResult" |
|
110 |
select="dnet:rel($datasetId, $resultId, 'resultResult', 'publicationDataset', 'isRelatedTo', $provenance, $trust, $about)"/> |
|
111 |
|
|
112 |
<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
113 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
114 |
<xsl:value-of select="$resultDataset"/> |
|
115 |
</QUALIFIER> |
|
116 |
</ROW> |
|
117 |
<ROW key="{$datasetId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
118 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
119 |
<xsl:value-of select="$datasetResult"/> |
|
120 |
</QUALIFIER> |
|
121 |
</ROW> |
|
122 |
</xsl:if> |
|
123 |
</xsl:for-each> |
|
124 |
|
|
125 |
<xsl:for-each select="//*[local-name()='creator']"> |
|
126 |
|
|
127 |
<xsl:if test="string-length(normalize-space(.)) > 0"> |
|
128 |
<xsl:variable name="originalPersonId"> |
|
129 |
<xsl:choose> |
|
130 |
<xsl:when test="string-length(@nameIdentifier) > 0"> |
|
131 |
<xsl:value-of select="normalize-space(@nameIdentifier)"/> |
|
132 |
</xsl:when> |
|
133 |
<xsl:otherwise> |
|
134 |
<xsl:value-of select="concat($originalid, '::', normalize-space(.))"/> |
|
135 |
</xsl:otherwise> |
|
136 |
</xsl:choose> |
|
137 |
</xsl:variable> |
|
138 |
|
|
139 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, $originalPersonId)"/> |
|
140 |
<xsl:variable name="position" select="position()"/> |
|
141 |
|
|
142 |
<xsl:variable name="person" |
|
143 |
select="dnet:person($personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname, |
|
144 |
$originalPersonId, $dateofcollection, $dateoftransformation, normalize-space(.), @nameIdentifier, @nameIdentifierScheme)"/> |
|
145 |
<xsl:variable name="personresult" |
|
146 |
select="dnet:personResult_Authorship($personId, $resultId, $position, 'isAuthorOf', $provenance, $trust, $about)"/> |
|
147 |
<xsl:variable name="resultperson" |
|
148 |
select="dnet:personResult_Authorship($resultId, $personId, $position, 'hasAuthor', $provenance, $trust, $about)"/> |
|
149 |
|
|
150 |
<xsl:if test="string-length($personId) > 0"> |
|
151 |
<ROW key="{$personId}" columnFamily="person"> |
|
152 |
<QUALIFIER name="body" type="base64"> |
|
153 |
<xsl:value-of select="$person"/> |
|
154 |
</QUALIFIER> |
|
155 |
</ROW> |
|
156 |
<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf"> |
|
157 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
158 |
<xsl:value-of select="$personresult"/> |
|
159 |
</QUALIFIER> |
|
160 |
</ROW> |
|
161 |
<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor"> |
|
162 |
<QUALIFIER name="{$personId}" type="base64"> |
|
163 |
<xsl:value-of select="$resultperson"/> |
|
164 |
</QUALIFIER> |
|
165 |
</ROW> |
|
166 |
</xsl:if> |
|
167 |
|
|
168 |
<!-- COAUTHORS --> |
|
169 |
<xsl:if test="$writeCoAuthors = true()"> |
|
170 |
<xsl:for-each select="../dc:creator"> |
|
171 |
<xsl:if test="$position != position()"> |
|
172 |
|
|
173 |
<xsl:variable name="originalCoauthorId"> |
|
174 |
<xsl:choose> |
|
175 |
<xsl:when test="string-length(@nameIdentifier) > 0"> |
|
176 |
<xsl:value-of select="normalize-space(@nameIdentifier)"/> |
|
177 |
</xsl:when> |
|
178 |
<xsl:otherwise> |
|
179 |
<xsl:value-of select="concat($originalid, '::', normalize-space(.))"/> |
|
180 |
</xsl:otherwise> |
|
181 |
</xsl:choose> |
|
182 |
</xsl:variable> |
|
183 |
|
|
184 |
<xsl:variable name="coauthorId" |
|
185 |
select="dnet:oafId('person', $namespaceprefix, $originalCoauthorId)"/> |
|
186 |
|
|
187 |
<xsl:variable name="personperson" |
|
188 |
select="dnet:rel($personId, $coauthorId, 'personPerson', 'coauthorship', 'isCoAuthorOf', $provenance, $trust, $about)"/> |
|
189 |
|
|
190 |
<ROW key="{$personId}" columnFamily="personPerson_coAuthorship_isCoAuthorOf"> |
|
191 |
<QUALIFIER name="{$coauthorId}" type="base64"> |
|
192 |
<xsl:value-of select="$personperson"/> |
|
193 |
</QUALIFIER> |
|
194 |
</ROW> |
|
195 |
</xsl:if> |
|
196 |
</xsl:for-each> |
|
197 |
</xsl:if> |
|
198 |
<!-- / COAUTHORS --> |
|
199 |
</xsl:if> |
|
200 |
</xsl:for-each> |
|
201 |
|
|
202 |
</ROWS> |
|
203 |
</xsl:if> |
|
204 |
</xsl:otherwise> |
|
205 |
</xsl:choose> |
|
206 |
</xsl:template> |
|
207 |
</xsl:stylesheet> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY param="entityType" /> |
|
52 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
55 |
</FAMILIES> |
|
56 |
</SCAN> |
|
57 |
</HADOOP_JOB> |
|
58 |
<STATUS> |
|
59 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
60 |
<RUNNING_INSTANCES value="0"/> |
|
61 |
<CUMULATIVE_RUN value="0" /> |
|
62 |
</STATUS> |
|
63 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
64 |
</BODY> |
|
65 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2HdfsActionsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="baa63c0c-0ff3-4a15-93c1-e361800e9ca8_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupSimilarity2HdfsActionsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToHdfsActionsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- OUTPUT --> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.output.compress" value="true"/> |
|
35 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK"/> |
|
36 |
|
|
37 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
38 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
39 |
|
|
40 |
<!-- Uncomment to override the default lib path --> |
|
41 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
42 |
</STATIC_CONFIGURATION> |
|
43 |
<JOB_INTERFACE> |
|
44 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="mapred.output.dir" required="true" description="target hbase table" /> |
|
47 |
<PARAM name="dedup.conf" required="true" description="dedup configuration" /> |
|
48 |
<PARAM name="rawSetId" required="true" description="raw set identifier" /> |
|
49 |
<PARAM name="similarityCF" required="true" description="similarity column family name" /> |
|
50 |
</JOB_INTERFACE> |
|
51 |
<SCAN> |
|
52 |
<FILTERS operator="MUST_PASS_ALL"> |
|
53 |
<FILTER type="prefix" param="entityTypeId" /> |
|
54 |
</FILTERS> |
|
55 |
<FAMILIES> |
|
56 |
<FAMILY param="entityType" /> |
|
57 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
58 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
59 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
60 |
</FAMILIES> |
|
61 |
</SCAN> |
|
62 |
</HADOOP_JOB> |
|
63 |
<STATUS> |
|
64 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
65 |
<RUNNING_INSTANCES value="0"/> |
|
66 |
<CUMULATIVE_RUN value="0" /> |
|
67 |
</STATUS> |
|
68 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
69 |
</BODY> |
|
70 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/sqoopStatsUpdateJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bf0ab07b-36bf-4164-ab73-342bfb11e51a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="StatsExportJob" type="oozie"> |
|
11 |
<DESCRIPTION>Job for importing data from HBASE to the relational Stats Database</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet"/> <!-- username = sqoop?? --> |
|
17 |
<PROPERTY key="workingDir" value="/user/dnet/lib/stats/working_dir"/> |
|
18 |
<PROPERTY key="numReducers" value="1"/> |
|
19 |
|
|
20 |
<PROPERTY key="oozie.wf.application.path" value="hdfs://nmis-hadoop-cluster/user/eri.katsari/stats/oozie_app"/><!-- edit this property! --> |
|
21 |
<PROPERTY key="Stats_db_Url" value="jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats"/><!-- complete the jdbc url with the actual value! --> |
|
22 |
<PROPERTY key="Stats_db_User" value="sqoop"/> |
|
23 |
<PROPERTY key="Stats_db_Pass" value="sqoop"/> |
|
24 |
<PROPERTY key="Stats_db_Driver" value="org.postgresql.Driver"/> |
|
25 |
<PROPERTY key="Stats_db_table_map" value="datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources"/> |
|
26 |
<PROPERTY key="Stats_sqoop_RecsPerStatement" value="10000"/> |
|
27 |
<PROPERTY key="Stats_sqoop_StatementPerTrans" value="1000000"/> |
|
28 |
<PROPERTY key="Stats_sqoop_ReducersCount" value="4"/> |
|
29 |
<PROPERTY key="Stats_output_Path" value="/tmp/stats/"/> |
|
30 |
<PROPERTY key="Stats_null_String_Field" value="null"/> |
|
31 |
<PROPERTY key="Stats_null_Numeric_Field" value="null"/> |
|
32 |
<PROPERTY key="Stats_enclosing_Character" value="#"/> |
|
33 |
<PROPERTY key="Stats_delim_Character" value="!"/> |
|
34 |
<PROPERTY key="out1" value="datasource"/> |
|
35 |
<PROPERTY key="out2" value="project"/> |
|
36 |
<PROPERTY key="out3" value="organization"/> |
|
37 |
<PROPERTY key="out4" value="datasourceOrganization"/> |
|
38 |
<PROPERTY key="out5" value="datasourceTopic"/> |
|
39 |
<PROPERTY key="out6" value="datasourceLanguage"/> |
|
40 |
<PROPERTY key="out7" value="projectOrganization"/> |
|
41 |
<PROPERTY key="out8" value="resultClaim"/> |
|
42 |
<PROPERTY key="out9" value="resultClassification"/> |
|
43 |
<PROPERTY key="out10" value="resultConcept"/> |
|
44 |
<PROPERTY key="out11" value="resultLanguage"/> |
|
45 |
<PROPERTY key="out12" value="resultOrganization"/> |
|
46 |
<PROPERTY key="out13" value="resultResult"/> |
|
47 |
<PROPERTY key="out14" value="resultProject"/> |
|
48 |
<PROPERTY key="out15" value="category"/> |
|
49 |
<PROPERTY key="out16" value="resultTopic"/> |
|
50 |
<PROPERTY key="out17" value="resultDatasource"/> |
|
51 |
<PROPERTY key="out18" value="result"/> |
|
52 |
<PROPERTY key="out19" value="claim"/> |
|
53 |
<PROPERTY key="out20" value="concept"/> |
|
54 |
</STATIC_CONFIGURATION> |
|
55 |
<JOB_INTERFACE> |
|
56 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
57 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
58 |
<PARAM name="Stats_Hbase_Source_Table" required="true" description="Hbase Table with Protobuffs."/> |
|
59 |
<PARAM name="Stats_indexConf" required="true" description="Index Entity Links configuration."/> |
|
60 |
<PARAM name="isLookupEndpoint" required="true" description="IS lookup service endpoint"/> |
|
61 |
</JOB_INTERFACE> |
|
62 |
</HADOOP_JOB> |
|
63 |
<STATUS> |
|
64 |
<LAST_SUBMISSION_DATE value="2014-11-14T19:57:25+00:00"/> |
|
65 |
<RUNNING_INSTANCES value="0"/> |
|
66 |
<CUMULATIVE_RUN value="75"/> |
|
67 |
</STATUS> |
|
68 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
69 |
</BODY> |
|
70 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsExportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="53f2a9b4-adf3-4ceb-9308-d88b53dc44c5_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupRootsExportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootExportMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
|
|
26 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
30 |
|
|
31 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
32 |
|
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
34 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
35 |
|
|
36 |
<!-- Uncomment to override the default lib path --> |
|
37 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
38 |
</STATIC_CONFIGURATION> |
|
39 |
<JOB_INTERFACE> |
|
40 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
41 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
42 |
|
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" param="entityTypeId" /> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY param="entityType" /> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceImportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b7d51a07-6996-4841-9a4a-685a044638e3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="informationSpaceImportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that import the whole information space table from a sequence file holding a json dump</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportInformationSpaceDumpMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
</STATIC_CONFIGURATION> |
|
32 |
<JOB_INTERFACE> |
|
33 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
34 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
35 |
|
|
36 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
37 |
</JOB_INTERFACE> |
|
38 |
<SCAN> |
|
39 |
<FILTERS /> |
|
40 |
<FAMILIES /> |
|
41 |
</SCAN> |
|
42 |
</HADOOP_JOB> |
|
43 |
<STATUS> |
|
44 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
45 |
<RUNNING_INSTANCES value="0"/> |
|
46 |
<CUMULATIVE_RUN value="0" /> |
|
47 |
</STATUS> |
|
48 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
49 |
</BODY> |
|
50 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/resetDedupJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bc4f377a-af07-403d-a019-af60aa557652_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="resetDedupJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that reset the dedup jobs</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.reset.HBaseResetMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Mutation" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
|
|
32 |
</STATIC_CONFIGURATION> |
|
33 |
<JOB_INTERFACE> |
|
34 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
35 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
36 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
38 |
</JOB_INTERFACE> |
|
39 |
</HADOOP_JOB> |
|
40 |
<STATUS> |
|
41 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
42 |
<RUNNING_INSTANCES value="0"/> |
|
43 |
<CUMULATIVE_RUN value="0" /> |
|
44 |
</STATUS> |
|
45 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
46 |
</BODY> |
|
47 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/test/java/eu/dnetlib/data/actionmanager/actions/ActionFactoryTest.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.IOException; |
4 | 4 |
import java.io.InputStream; |
5 |
import java.io.StringReader; |
|
5 | 6 |
import java.io.StringWriter; |
6 | 7 |
import java.util.Arrays; |
8 |
import java.util.HashMap; |
|
7 | 9 |
import java.util.List; |
8 |
import javax.annotation.Resource;
|
|
10 |
import java.util.Map;
|
|
9 | 11 |
import javax.xml.transform.TransformerException; |
10 | 12 |
|
13 |
import com.google.common.collect.Maps; |
|
11 | 14 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
12 | 15 |
import eu.dnetlib.rmi.data.actionmanager.ActionManagerException; |
13 | 16 |
import eu.dnetlib.rmi.data.actionmanager.Agent; |
... | ... | |
17 | 20 |
import org.apache.commons.logging.Log; |
18 | 21 |
import org.apache.commons.logging.LogFactory; |
19 | 22 |
import org.apache.hadoop.hbase.client.Put; |
23 |
import org.dom4j.Document; |
|
20 | 24 |
import org.dom4j.DocumentException; |
25 |
import org.dom4j.io.SAXReader; |
|
26 |
import org.junit.Before; |
|
21 | 27 |
import org.junit.Test; |
22 | 28 |
import org.junit.runner.RunWith; |
29 |
import org.mockito.Mock; |
|
30 |
import org.mockito.Mockito; |
|
31 |
import org.mockito.runners.MockitoJUnitRunner; |
|
32 |
import org.springframework.core.io.ClassPathResource; |
|
33 |
import org.springframework.core.io.Resource; |
|
23 | 34 |
import org.springframework.test.context.ContextConfiguration; |
24 | 35 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
25 | 36 |
|
26 | 37 |
import static org.junit.Assert.*; |
27 | 38 |
|
28 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
29 |
@ContextConfiguration(locations = { "/eu/dnetlib/data/actionmanager/actions/applicationContext-actionmanager-test.xml" }) |
|
39 |
@RunWith(MockitoJUnitRunner.class) |
|
30 | 40 |
public class ActionFactoryTest { |
31 | 41 |
|
32 | 42 |
private static final Log log = LogFactory.getLog(ActionFactoryTest.class); |
43 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/"; |
|
33 | 44 |
private final static Agent agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.algo); |
34 | 45 |
private final static String trust = "0.9"; |
35 | 46 |
private final InputStream recordOdfInputStream = getClass().getResourceAsStream("recordOdf.xml"); |
36 | 47 |
private final InputStream recordOafInputStream = getClass().getResourceAsStream("recordOaf.xml"); |
37 |
@Resource |
|
38 |
private eu.dnetlib.data.actionmanager.actions.ActionFactory actionFactory; |
|
39 | 48 |
|
40 |
// @Before |
|
41 |
// public void setUp() throws Exception { |
|
42 |
// |
|
43 |
// } |
|
49 |
private ActionFactory actionFactory; |
|
50 |
private Map<String, Resource> xslts = new HashMap<>(); |
|
44 | 51 |
|
52 |
@Mock |
|
53 |
private Resource oaf2insertActions; |
|
54 |
@Mock |
|
55 |
private Resource oaf2updateActions; |
|
56 |
@Mock |
|
57 |
private Resource odf2insertActions; |
|
58 |
@Mock |
|
59 |
private Resource odf2updateActions; |
|
60 |
@Mock |
|
61 |
private Resource rels2actions; |
|
62 |
|
|
63 |
@Before |
|
64 |
public void setUp() throws Exception { |
|
65 |
actionFactory = new ActionFactory(); |
|
66 |
|
|
67 |
xslts.put("oaf2insertActions", oaf2insertActions); |
|
68 |
xslts.put("oaf2updateActions", oaf2updateActions); |
|
69 |
xslts.put("odf2insertActions", odf2insertActions); |
|
70 |
xslts.put("odf2updateActions", odf2updateActions); |
|
71 |
xslts.put("rels2actions", rels2actions); |
|
72 |
|
|
73 |
Mockito.when(oaf2insertActions.getInputStream()).thenReturn(loadFromTransformationProfile("oaf2insertActions.xml")); |
|
74 |
Mockito.when(oaf2updateActions.getInputStream()).thenReturn(loadFromTransformationProfile("oaf2updateActions.xml")); |
|
75 |
Mockito.when(odf2insertActions.getInputStream()).thenReturn(loadFromTransformationProfile("odf2insertActions.xml")); |
|
76 |
Mockito.when(odf2updateActions.getInputStream()).thenReturn(loadFromTransformationProfile("odf2updateActions.xml")); |
|
77 |
// Mockito.when(rels2actions.getInputStream()).thenReturn(loadFromTransformationProfile("rels2actions.xml")); |
|
78 |
|
|
79 |
actionFactory.setXslts(xslts); |
|
80 |
} |
|
81 |
|
|
45 | 82 |
@Test |
46 | 83 |
public void testBeans() { |
47 | 84 |
assertNotNull(actionFactory); |
... | ... | |
93 | 130 |
for (AtomicAction a : action.calculateAtomicActions()) { |
94 | 131 |
assertNotNull(a.getTargetValue()); |
95 | 132 |
|
96 |
String actionJSON = a.toString();
|
|
133 |
String actionJSON = AtomicActionSerialiser.toJSON(a);
|
|
97 | 134 |
|
98 | 135 |
log.info(actionJSON); |
99 | 136 |
|
... | ... | |
126 | 163 |
IOUtils.copy(i, writer); |
127 | 164 |
return writer.toString(); |
128 | 165 |
} |
166 |
|
|
167 |
private InputStream loadFromTransformationProfile(final String profilePath) { |
|
168 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
169 |
InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
170 |
final String s; |
|
171 |
try { |
|
172 |
s = IOUtils.toString(profile); |
|
173 |
log.debug("profile: " + s); |
|
174 |
} catch (IOException e) { |
|
175 |
e.printStackTrace(); |
|
176 |
throw new RuntimeException(e); |
|
177 |
} |
|
178 |
|
|
179 |
final SAXReader saxReader = new SAXReader(); |
|
180 |
Document doc = null; |
|
181 |
|
|
182 |
try { |
|
183 |
doc = saxReader.read(new StringReader(s)); |
|
184 |
} catch (DocumentException e) { |
|
185 |
e.printStackTrace(); |
|
186 |
throw new RuntimeException(e); |
|
187 |
} |
|
188 |
String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
189 |
return IOUtils.toInputStream(xslt); |
|
190 |
} |
|
129 | 191 |
} |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="offlineHbaseLoad" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId" /> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType" /> |
|
48 |
</FAMILIES> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0" /> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="queueName" value="default"/> |
|
15 |
<PROPERTY key="user.name" value="dnet" /> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/> |
|
19 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
20 |
<PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/> |
|
21 |
<PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
</STATIC_CONFIGURATION> |
|
24 |
<JOB_INTERFACE> |
|
25 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" /> |
|
26 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" /> |
|
27 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" /> |
|
28 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" /> |
|
29 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint" /> |
|
30 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" /> |
|
31 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" /> |
|
32 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" /> |
|
33 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" /> |
|
34 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" /> |
|
35 |
<PARAM name="nameNode" required="true" description="hdfs name node" /> |
|
36 |
<PARAM name="jobTracker" required="true" description="job tracker name" /> |
|
37 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" /> |
|
38 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" /> |
|
39 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" /> |
|
40 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" /> |
|
41 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" /> |
|
42 |
</JOB_INTERFACE> |
|
43 |
</HADOOP_JOB> |
|
44 |
<STATUS> |
|
45 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
46 |
<RUNNING_INSTANCES value="0"/> |
|
47 |
<CUMULATIVE_RUN value="0" /> |
|
48 |
</STATUS> |
|
49 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
50 |
</BODY> |
|
51 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisPreprocessingJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
17 |
|
|
18 |
<!-- Runtime --> |
|
19 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/> |
|
20 |
<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/> |
|
21 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
26 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
27 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
28 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
29 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
30 |
</STATIC_CONFIGURATION> |
|
31 |
<JOB_INTERFACE> |
|
32 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/> |
|
33 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
34 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/> |
|
35 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/> |
|
36 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint"/> |
|
37 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/> |
|
38 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/> |
|
39 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
40 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
41 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
42 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
43 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
44 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
45 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/> |
|
46 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/> |
|
47 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/> |
|
48 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/> |
|
49 |
</JOB_INTERFACE> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaire-domain/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/connectedComponentsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="28e8d911-87e5-4f39-9ce9-0d445126c75f_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="connectedComponentsJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job joins all the vertex ids to build the connected components in the graph</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source path on hdfs"/> |
|
44 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
Also available in: Unified diff
tests, added openaire profiles