Revision 40063
Added by Alessia Bardi about 9 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/dmf_2_hbase.xsl | ||
---|---|---|
1 |
<xsl:stylesheet xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
2 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exslt="http://exslt.org/common" |
|
3 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt" |
|
5 |
extension-element-prefixes="exslt" version="1.0"> |
|
6 |
<xsl:output indent="yes" omit-xml-declaration="yes"/> |
|
7 |
<xsl:param name="mergeIdForHomonyms" select="true()"/> |
|
8 |
<xsl:param name="writeCoAuthors" select="false()"/> |
|
9 |
<xsl:template match="/*"> |
|
10 |
<xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/> |
|
11 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
12 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/> |
|
13 |
<xsl:variable name="namespaceprefix"> |
|
14 |
<xsl:choose> |
|
15 |
|
|
16 |
<!-- TODO check namespaceprefix length is 12 --> |
|
17 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
18 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
19 |
</xsl:when> |
|
20 |
<xsl:otherwise> |
|
21 |
<xsl:value-of select="unknown_"/> |
|
22 |
</xsl:otherwise> |
|
23 |
</xsl:choose> |
|
24 |
</xsl:variable> |
|
25 |
<xsl:choose> |
|
26 |
<!-- <xsl:when test="count($metadata) = 0 or string-length($namespaceprefix) = 0"> --> |
|
27 |
<xsl:when test="count($metadata) = 0"> |
|
28 |
<ROWS/> |
|
29 |
</xsl:when> |
|
30 |
<xsl:otherwise> |
|
31 |
<xsl:variable name="objIdentifier" select="//dri:objIdentifier"/> |
|
32 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
33 |
<xsl:if test="string-length($resultId) > 0"> |
|
34 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
35 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
36 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
37 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
38 |
<xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/> |
|
39 |
<xsl:variable name="originalid"> |
|
40 |
<xsl:choose> |
|
41 |
<xsl:when test="contains($originalidTest, '::')"> |
|
42 |
<xsl:value-of select="substring-after($originalidTest, '::')"/> |
|
43 |
</xsl:when> |
|
44 |
<xsl:otherwise> |
|
45 |
<xsl:value-of select="$originalidTest"/> |
|
46 |
</xsl:otherwise> |
|
47 |
</xsl:choose> |
|
48 |
</xsl:variable> |
|
49 |
<xsl:variable name="result" |
|
50 |
select="dnet:oafResultFromMDStore($resultId, $about, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, $metadata)"/> |
|
51 |
<ROWS> |
|
52 |
<ROW columnFamily="result" key="{$resultId}"> |
|
53 |
<QUALIFIER name="body" type="base64"> |
|
54 |
<xsl:value-of select="$result"/> |
|
55 |
</QUALIFIER> |
|
56 |
</ROW> |
|
57 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
58 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
59 |
<xsl:if test="string-length($projectId) > 0"> |
|
60 |
<xsl:variable name="resultproject" |
|
61 |
select="dnet:oafResultProject_Outcome_FromMDStore($resultId, $projectId, 'isProducedBy', $about)"/> |
|
62 |
<xsl:variable name="projectresult" |
|
63 |
select="dnet:oafResultProject_Outcome_FromMDStore($projectId, $resultId, 'produces', $about)"/> |
|
64 |
<ROW columnFamily="resultProject_outcome_isProducedBy" key="{$resultId}"> |
|
65 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
66 |
<xsl:value-of select="$resultproject"/> |
|
67 |
</QUALIFIER> |
|
68 |
</ROW> |
|
69 |
<ROW columnFamily="resultProject_outcome_produces" key="{$projectId}"> |
|
70 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
71 |
<xsl:value-of select="$projectresult"/> |
|
72 |
</QUALIFIER> |
|
73 |
</ROW> |
|
74 |
</xsl:if> |
|
75 |
</xsl:for-each> |
|
76 |
<xsl:for-each select="//*[local-name()='relatedDataset']"> |
|
77 |
|
|
78 |
<!-- relatedDataset ids must be in the openaire format --> |
|
79 |
<xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
80 |
<xsl:if test="string-length($datasetId) > 0"> |
|
81 |
<xsl:variable name="resultDataset" |
|
82 |
select="dnet:oafResultResult_PublicationDataset_FromMDStore($resultId, $datasetId, 'isRelatedTo', $about)"/> |
|
83 |
<xsl:variable name="datasetResult" |
|
84 |
select="dnet:oafResultResult_PublicationDataset_FromMDStore($datasetId, $resultId, 'isRelatedTo', $about)"/> |
|
85 |
<ROW columnFamily="resultResult_publicationDataset_isRelatedTo" key="{$resultId}"> |
|
86 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
87 |
<xsl:value-of select="$resultDataset"/> |
|
88 |
</QUALIFIER> |
|
89 |
</ROW> |
|
90 |
<ROW columnFamily="resultResult_publicationDataset_isRelatedTo" key="{$datasetId}"> |
|
91 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
92 |
<xsl:value-of select="$datasetResult"/> |
|
93 |
</QUALIFIER> |
|
94 |
</ROW> |
|
95 |
</xsl:if> |
|
96 |
</xsl:for-each> |
|
97 |
<xsl:for-each select="//*[local-name()='creator']"> |
|
98 |
<xsl:if test="string-length(normalize-space(.)) > 0"> |
|
99 |
<xsl:variable name="personIdPart"> |
|
100 |
<xsl:call-template name="personIdTmpl"> |
|
101 |
<xsl:with-param name="objIdentifier" select="$objIdentifier"/> |
|
102 |
</xsl:call-template> |
|
103 |
</xsl:variable> |
|
104 |
<xsl:variable name="position" select="position()"/> |
|
105 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdPart))"/> |
|
106 |
<xsl:variable name="person" |
|
107 |
select="dnet:oafPersonFromMDStore($personId, $about, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, .)"/> |
|
108 |
<xsl:variable name="personresult" |
|
109 |
select="dnet:oafPersonResult_Authorship_FromMDStore($personId, $resultId, $position, 'isAuthorOf', $about)"/> |
|
110 |
<xsl:variable name="resultperson" |
|
111 |
select="dnet:oafPersonResult_Authorship_FromMDStore($resultId, $personId, $position, 'hasAuthor', $about)"/> |
|
112 |
<xsl:if test="string-length($personId) > 0"> |
|
113 |
<ROW columnFamily="person" key="{$personId}"> |
|
114 |
<QUALIFIER name="body" type="base64"> |
|
115 |
<xsl:value-of select="$person"/> |
|
116 |
</QUALIFIER> |
|
117 |
</ROW> |
|
118 |
<ROW columnFamily="personResult_authorship_isAuthorOf" key="{$personId}"> |
|
119 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
120 |
<xsl:value-of select="$personresult"/> |
|
121 |
</QUALIFIER> |
|
122 |
</ROW> |
|
123 |
<ROW columnFamily="personResult_authorship_hasAuthor" key="{$resultId}"> |
|
124 |
<QUALIFIER name="{$personId}" type="base64"> |
|
125 |
<xsl:value-of select="$resultperson"/> |
|
126 |
</QUALIFIER> |
|
127 |
</ROW> |
|
128 |
<ROW columnFamily="result" key="{$personId}"> |
|
129 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
130 |
<xsl:value-of select="$result"/> |
|
131 |
</QUALIFIER> |
|
132 |
</ROW> |
|
133 |
</xsl:if> |
|
134 |
|
|
135 |
<!-- COAUTHORS --> |
|
136 |
<xsl:if test="$writeCoAuthors = true()"> |
|
137 |
<xsl:for-each select="../dc:creator"> |
|
138 |
<xsl:if test="$position != position()"> |
|
139 |
<xsl:variable name="coauthorIdPart"> |
|
140 |
<xsl:call-template name="personIdTmpl"> |
|
141 |
<xsl:with-param name="objIdentifier" select="$objIdentifier"/> |
|
142 |
</xsl:call-template> |
|
143 |
</xsl:variable> |
|
144 |
<xsl:variable name="coauthorId" select="dnet:oafId('person', $namespaceprefix, normalize-space($coauthorIdPart))"/> |
|
145 |
<xsl:variable name="personperson" |
|
146 |
select="dnet:oafPersonPerson_CoAuthorship_FromDMF($personId, $coauthorId, 'isCoAuthorOf', $about)"/> |
|
147 |
<ROW columnFamily="personPerson_coAuthorship_isCoAuthorOf" key="{$personId}"> |
|
148 |
<QUALIFIER name="{$coauthorId}" type="base64"> |
|
149 |
<xsl:value-of select="$personperson"/> |
|
150 |
</QUALIFIER> |
|
151 |
</ROW> |
|
152 |
</xsl:if> |
|
153 |
</xsl:for-each> |
|
154 |
</xsl:if> |
|
155 |
<!-- / COAUTHORS --> |
|
156 |
</xsl:if> |
|
157 |
</xsl:for-each> |
|
158 |
</ROWS> |
|
159 |
</xsl:if> |
|
160 |
</xsl:otherwise> |
|
161 |
</xsl:choose> |
|
162 |
</xsl:template> |
|
163 |
<xsl:template name="personIdTmpl"> |
|
164 |
<xsl:param name="objIdentifier"/> |
|
165 |
<xsl:choose> |
|
166 |
<xsl:when test="$mergeIdForHomonyms = false()"> |
|
167 |
<xsl:value-of select="concat($objIdentifier, '::', normalize-space(.))"/> |
|
168 |
</xsl:when> |
|
169 |
<xsl:otherwise> |
|
170 |
<xsl:value-of select="normalize-space(.)"/> |
|
171 |
</xsl:otherwise> |
|
172 |
</xsl:choose> |
|
173 |
</xsl:template> |
|
174 |
</xsl:stylesheet> |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/odf_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreDataCiteToHbaseXsltFunctions" |
|
7 |
xmlns:exslt="http://exslt.org/common" extension-element-prefixes="exslt" |
|
8 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
9 |
|
|
10 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
11 |
<xsl:template match="/*"> |
|
12 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
13 |
<xsl:variable name="dateofcollection" select="//dri:dateOfCollection" /> |
|
14 |
<xsl:variable name="metadata" |
|
15 |
select="exslt:node-set(//*[local-name()='metadata']/*)" /> |
|
16 |
<xsl:variable name="namespaceprefix"> |
|
17 |
<xsl:choose> |
|
18 |
<!-- TODO check namespaceprefix length is 12 --> |
|
19 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
20 |
<xsl:value-of select="//oaf:datasourceprefix" /> |
|
21 |
</xsl:when> |
|
22 |
<xsl:otherwise> |
|
23 |
<xsl:value-of select="unknown_____" /> |
|
24 |
</xsl:otherwise> |
|
25 |
</xsl:choose> |
|
26 |
</xsl:variable> |
|
27 |
|
|
28 |
<xsl:choose> |
|
29 |
<xsl:when |
|
30 |
test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'"> |
|
31 |
<ROWS /> |
|
32 |
</xsl:when> |
|
33 |
<xsl:otherwise> |
|
34 |
|
|
35 |
<xsl:variable name="resultId" |
|
36 |
select="dnet:oafSimpleId('result', //dri:objIdentifier)" /> |
|
37 |
|
|
38 |
<xsl:if test="string-length($resultId) > 0"> |
|
39 |
<xsl:variable name="originalid" |
|
40 |
select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier'])" /> |
|
41 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']" /> |
|
42 |
<xsl:variable name="titles" select="//*[local-name() = 'title']" /> |
|
43 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']" /> |
|
44 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']" /> |
|
45 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']" /> |
|
46 |
<xsl:variable name="dates" select="//*[local-name() = 'date']" /> |
|
47 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" /> |
|
48 |
<xsl:variable name="resourceType" |
|
49 |
select="//*[local-name() = 'resourceType']" /> |
|
50 |
<xsl:variable name="formats" select="//*[local-name() = 'format']" /> |
|
51 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']" /> |
|
52 |
<xsl:variable name="rights" select="//oaf:accessrights" /> |
|
53 |
<xsl:variable name="version" select="//*[local-name() = 'version']" /> |
|
54 |
<xsl:variable name="language" select="//oaf:language" /> |
|
55 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory" /> |
|
56 |
|
|
57 |
<xsl:variable name="instanceURI"> |
|
58 |
<xsl:choose> |
|
59 |
<xsl:when |
|
60 |
test="string-length( //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']) > 0"> |
|
61 |
<xsl:value-of |
|
62 |
select="concat('http://dx.doi.org','/', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']/text())" /> |
|
63 |
</xsl:when> |
|
64 |
<xsl:otherwise> |
|
65 |
<xsl:value-of |
|
66 |
select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='URL'])" /> |
|
67 |
</xsl:otherwise> |
|
68 |
</xsl:choose> |
|
69 |
</xsl:variable> |
|
70 |
|
|
71 |
<xsl:variable name="hostedbyid" |
|
72 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" /> |
|
73 |
<xsl:variable name="hostedbyname" select="concat('', //oaf:hostedBy/@name)" /> |
|
74 |
<xsl:variable name="collectedfromid" |
|
75 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" /> |
|
76 |
<xsl:variable name="collectedfromname" |
|
77 |
select="concat('', //oaf:collectedFrom/@name)" /> |
|
78 |
<xsl:variable name="dateOfCollection" |
|
79 |
select="concat('', //dri:dateOfCollection)" /> |
|
80 |
|
|
81 |
<xsl:variable name="result" |
|
82 |
select="dnet:oafResult_FromDatacite($resultId, $dataInfo, $metadata, $titles, |
|
83 |
$subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, |
|
84 |
$formats, $sizes, $language, $cobjcategory, $rights, $version, $hostedbyid, $hostedbyname, |
|
85 |
$collectedfromid, $collectedfromname, $originalid, $instanceURI, $dateOfCollection)" /> |
|
86 |
|
|
87 |
<ROWS> |
|
88 |
<ROW key="{$resultId}" columnFamily="result"> |
|
89 |
<QUALIFIER name="body" type="base64"> |
|
90 |
<xsl:value-of select="$result" /> |
|
91 |
</QUALIFIER> |
|
92 |
</ROW> |
|
93 |
<xsl:for-each select="//*[local-name() = 'creator']"> |
|
94 |
<xsl:variable name="personIdTemp"> |
|
95 |
<xsl:choose> |
|
96 |
<xsl:when |
|
97 |
test="string-length(./*[local-name() = 'nameIdentifier']) > 0"> |
|
98 |
<xsl:value-of |
|
99 |
select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')" /> |
|
100 |
</xsl:when> |
|
101 |
<xsl:otherwise> |
|
102 |
<xsl:value-of |
|
103 |
select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')" /> |
|
104 |
</xsl:otherwise> |
|
105 |
</xsl:choose> |
|
106 |
</xsl:variable> |
|
107 |
<xsl:variable name="personId" |
|
108 |
select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))" /> |
|
109 |
|
|
110 |
<xsl:variable name="originalPersonId" |
|
111 |
select="./*[local-name() = 'nameIdentifier']" /> |
|
112 |
<xsl:variable name="position" select="position()" /> |
|
113 |
<xsl:if test="string-length($personId) > 0"> |
|
114 |
<xsl:variable name="person" |
|
115 |
select="dnet:oafPerson_FromDatacite($personId, $dataInfo, $collectedfromid, $collectedfromname,$originalPersonId, $dateOfCollection ,normalize-space(./*[local-name() = 'creatorName']))" /> |
|
116 |
|
|
117 |
<xsl:variable name="personresult" |
|
118 |
select="dnet:oafPersonResult_Authorship_FromDatacite($personId, $resultId, $position, 'isAuthorOf', $dataInfo)" /> |
|
119 |
<xsl:variable name="resultperson" |
|
120 |
select="dnet:oafPersonResult_Authorship_FromDatacite($resultId, $personId, $position, 'hasAuthor', $dataInfo)" /> |
|
121 |
<ROW key="{$personId}" columnFamily="person"> |
|
122 |
<QUALIFIER name="body" type="base64"> |
|
123 |
<xsl:value-of select="$person" /> |
|
124 |
</QUALIFIER> |
|
125 |
</ROW> |
|
126 |
<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf"> |
|
127 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
128 |
<xsl:value-of select="$personresult" /> |
|
129 |
</QUALIFIER> |
|
130 |
</ROW> |
|
131 |
<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor"> |
|
132 |
<QUALIFIER name="{$personId}" type="base64"> |
|
133 |
<xsl:value-of select="$resultperson" /> |
|
134 |
</QUALIFIER> |
|
135 |
</ROW> |
|
136 |
</xsl:if> |
|
137 |
</xsl:for-each> |
|
138 |
|
|
139 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
140 |
|
|
141 |
<xsl:variable name="projectId" |
|
142 |
select="dnet:oafSplitId('project', normalize-space(.))" /> |
|
143 |
|
|
144 |
<xsl:variable name="resultproject" |
|
145 |
select="dnet:oafResultProject_Outcome_FromDatacite($resultId, $projectId, 'isProducedBy', $dataInfo)" /> |
|
146 |
<xsl:variable name="projectresult" |
|
147 |
select="dnet:oafResultProject_Outcome_FromDatacite($projectId, $resultId, 'produces', $dataInfo)" /> |
|
148 |
|
|
149 |
<xsl:if test="string-length($projectId) > 0"> |
|
150 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
151 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
152 |
<xsl:value-of select="$resultproject" /> |
|
153 |
</QUALIFIER> |
|
154 |
</ROW> |
|
155 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
156 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
157 |
<xsl:value-of select="$projectresult" /> |
|
158 |
</QUALIFIER> |
|
159 |
</ROW> |
|
160 |
</xsl:if> |
|
161 |
</xsl:for-each> |
|
162 |
|
|
163 |
<xsl:for-each select="//*[local-name()='relatedPublication']"> |
|
164 |
|
|
165 |
<!-- relatedDataset ids must be in the openaire format --> |
|
166 |
<xsl:variable name="publicationId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
167 |
|
|
168 |
<xsl:if test="string-length($publicationId) > 0"> |
|
169 |
|
|
170 |
<xsl:variable name="resultDataset" select="dnet:oafResultResult_PublicationDataset_FromDatacite($resultId, $publicationId, 'isRelatedTo', $dataInfo)"/> |
|
171 |
<xsl:variable name="datasetResult" select="dnet:oafResultResult_PublicationDataset_FromDatacite($publicationId, $resultId, 'isRelatedTo', $dataInfo)"/> |
|
172 |
|
|
173 |
<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
174 |
<QUALIFIER name="{$publicationId}" type="base64"><xsl:value-of select="$resultDataset"/></QUALIFIER> |
|
175 |
</ROW> |
|
176 |
<ROW key="{$publicationId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
177 |
<QUALIFIER name="{$resultId}" type="base64"><xsl:value-of select="$datasetResult"/></QUALIFIER> |
|
178 |
</ROW> |
|
179 |
</xsl:if> |
|
180 |
</xsl:for-each> |
|
181 |
</ROWS> |
|
182 |
</xsl:if> |
|
183 |
</xsl:otherwise> |
|
184 |
</xsl:choose> |
|
185 |
</xsl:template> |
|
186 |
</xsl:stylesheet> |
modules/dnet-mapreduce-jobs/trunk/install.sh | ||
---|---|---|
1 | 1 |
#!/bin/bash |
2 | 2 |
|
3 |
mvn clean install -DskipTests=true;
|
|
3 |
mvn clean install; |
|
4 | 4 |
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly; |
5 | 5 |
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-0.0.8.4-SNAPSHOT-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=0.0.8.4-SNAPSHOT -Dpackaging=jar |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java | ||
---|---|---|
21 | 21 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
22 | 22 |
import org.apache.commons.io.IOUtils; |
23 | 23 |
import org.apache.commons.lang.StringUtils; |
24 |
import org.apache.commons.logging.Log; |
|
25 |
import org.apache.commons.logging.LogFactory; |
|
24 | 26 |
import org.dom4j.Document; |
25 | 27 |
import org.dom4j.DocumentException; |
26 | 28 |
import org.dom4j.io.SAXReader; |
... | ... | |
32 | 34 |
|
33 | 35 |
public class XsltRowTransformerFactoryTest { |
34 | 36 |
|
37 |
private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class); |
|
35 | 38 |
private XsltRowTransformerFactory factory; |
36 | 39 |
|
37 | 40 |
private EntityConfigTable entityConfigTable; |
41 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/"; |
|
38 | 42 |
|
39 | 43 |
@Before |
40 | 44 |
public void setUp() throws Exception { |
... | ... | |
55 | 59 |
|
56 | 60 |
xslParams.put("mergeIdForHomonymsMap", m); |
57 | 61 |
|
58 |
final List<Row> rows = asRows(load("dmf_2_hbase_person.xsl"), xslParams, load("recordManyAuthors.xml"));
|
|
62 |
final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
|
|
59 | 63 |
int authorCount = 0; |
60 | 64 |
for (final Row row : rows) { |
61 | 65 |
for (final Column<String, byte[]> col : row.getColumns()) { |
... | ... | |
64 | 68 |
final OafDecoder d = OafDecoder.decode(col.getValue()); |
65 | 69 |
assertNotNull(d); |
66 | 70 |
|
67 |
System.out.println(d.getEntity().getPerson().getCoauthorList().size());
|
|
71 |
log.debug(d.getEntity().getPerson().getCoauthorList().size());
|
|
68 | 72 |
} |
69 | 73 |
} |
70 |
System.out.println("---> " + authorCount);
|
|
74 |
log.info("authors' count: ---> " + authorCount);
|
|
71 | 75 |
|
72 | 76 |
} |
73 | 77 |
|
... | ... | |
84 | 88 |
|
85 | 89 |
xslParams.put("mergeIdForHomonymsMap", m); |
86 | 90 |
|
87 |
final List<Row> rows = asRows(load("dmf_2_hbase_person.xsl"), xslParams, load("recordArxiv.xml"));
|
|
91 |
final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
|
|
88 | 92 |
|
89 | 93 |
for (final Row row : rows) { |
90 | 94 |
for (final Column<String, byte[]> col : row.getColumns()) { |
91 | 95 |
|
92 | 96 |
final OafDecoder d = OafDecoder.decode(col.getValue()); |
93 |
System.out.println(d.getOaf());
|
|
97 |
log.debug(d.getOaf());
|
|
94 | 98 |
} |
95 | 99 |
} |
96 | 100 |
} |
... | ... | |
98 | 102 |
@Test |
99 | 103 |
public void testParseClaimUpdate() throws Exception { |
100 | 104 |
|
101 |
doTest(load("odf_2_hbase.xsl"), load("recordClaimUpdate.xml"));
|
|
105 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaimUpdate.xml"));
|
|
102 | 106 |
} |
103 | 107 |
|
104 | 108 |
@Test |
105 | 109 |
public void testParseDatasetPUB() throws Exception { |
106 | 110 |
|
107 |
doTest(load("odf_2_hbase.xsl"), load("recordDatasetPUB.xml"));
|
|
111 |
doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
|
|
108 | 112 |
} |
109 | 113 |
|
110 | 114 |
@Test |
111 | 115 |
public void testParseClaim() throws Exception { |
112 | 116 |
|
113 |
doTest(load("dmf_2_hbase.xsl"), load("recordClaim.xml"));
|
|
117 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
|
|
114 | 118 |
} |
115 | 119 |
|
116 | 120 |
@Test |
117 | 121 |
public void testParseACM() throws Exception { |
118 | 122 |
|
119 |
doTest(load("dmf_2_hbase.xsl"), load("recordACM.xml"));
|
|
123 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
|
|
120 | 124 |
} |
121 | 125 |
|
122 | 126 |
@Test |
123 | 127 |
public void testParseASB() throws Exception { |
124 | 128 |
|
125 |
doTest(load("dmf_2_hbase.xsl"), load("recordASB.xml"));
|
|
129 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
|
|
126 | 130 |
} |
127 | 131 |
|
128 | 132 |
@Test |
... | ... | |
140 | 144 |
@Test |
141 | 145 |
public void testParseDmf() throws Exception { |
142 | 146 |
|
143 |
doTest(load("dmf_2_hbase.xsl"), load("record.xml"));
|
|
147 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
|
|
144 | 148 |
} |
145 | 149 |
|
146 | 150 |
@Test |
147 | 151 |
public void testParseDatacite() throws Exception { |
148 | 152 |
|
149 |
doTest(load("datacite_2_hbase.xsl"), load("recordDatacite.xml"));
|
|
153 |
doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
|
|
150 | 154 |
} |
151 | 155 |
|
152 | 156 |
@Test |
153 | 157 |
public void testParseDatacite2() throws Exception { |
154 | 158 |
|
155 |
doTest(load("datacite_2_hbase.xsl"), load("recordDatacite2.xml"));
|
|
159 |
doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
|
|
156 | 160 |
} |
157 | 161 |
|
158 | 162 |
@Test |
159 | 163 |
public void testLinkPangaea() throws Exception { |
160 | 164 |
|
161 | 165 |
final List<Row> rows = Lists.newArrayList(); |
162 |
rows.addAll(asRows(load("datacite_2_hbase.xsl"), load("pangaeODF.xml")));
|
|
163 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("pangaeOAF.xml")));
|
|
166 |
rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF.xml")));
|
|
167 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
|
|
164 | 168 |
rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordCorda.xml"))); |
165 | 169 |
|
166 | 170 |
printAll(mapAll(buildTable(rows))); |
... | ... | |
171 | 175 |
|
172 | 176 |
final List<Row> rows = Lists.newArrayList(); |
173 | 177 |
rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordCorda.xml"))); |
174 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordCorda.xml")));
|
|
178 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
|
|
175 | 179 |
|
176 | 180 |
printAll(mapAll(buildTable(rows))); |
177 | 181 |
} |
... | ... | |
181 | 185 |
|
182 | 186 |
final List<Row> rows = Lists.newArrayList(); |
183 | 187 |
rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordFCT.xml"))); |
184 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordFCT.xml")));
|
|
188 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
|
|
185 | 189 |
|
186 | 190 |
printAll(mapAll(buildTable(rows))); |
187 | 191 |
} |
... | ... | |
191 | 195 |
|
192 | 196 |
final List<Row> rows = Lists.newArrayList(); |
193 | 197 |
rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordWT.xml"))); |
194 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordWT.xml")));
|
|
198 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
|
|
195 | 199 |
|
196 | 200 |
printAll(mapAll(buildTable(rows))); |
197 | 201 |
} |
... | ... | |
221 | 225 |
xslParams.put("mergeIdForHomonymsMap", m); |
222 | 226 |
|
223 | 227 |
final List<Row> rows = Lists.newArrayList(); |
224 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordArxiv.xml")));
|
|
225 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordArxiv2.xml")));
|
|
226 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordPuma1.xml")));
|
|
227 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordPuma2.xml")));
|
|
228 |
rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordUNIBI.xml")));
|
|
228 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv.xml")));
|
|
229 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
|
|
230 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma1.xml")));
|
|
231 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma2.xml")));
|
|
232 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
|
|
229 | 233 |
|
230 | 234 |
printPersonFullnames(mapAll(buildTable(rows))); |
231 | 235 |
} |
... | ... | |
233 | 237 |
@Test |
234 | 238 |
public void testParseDoajOAF() throws Exception { |
235 | 239 |
|
236 |
doTest(load("dmf_2_hbase.xsl"), load("doajUniversityRecord.xml"));
|
|
240 |
doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
|
|
237 | 241 |
} |
238 | 242 |
|
239 | 243 |
@Test |
... | ... | |
244 | 248 |
|
245 | 249 |
// @Test |
246 | 250 |
public void testFromMongodbCompressedDump() throws Exception { |
247 |
doTestJsonGz(load("dmf_2_hbase.xsl"), load("mdstore_cleaned.json.gz"));
|
|
251 |
doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
|
|
248 | 252 |
} |
249 | 253 |
|
250 | 254 |
private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception { |
251 | 255 |
try { |
252 | 256 |
final List<Row> rows = asRows(xsltStream, recordStream); |
253 | 257 |
|
254 |
System.out.println(rows);
|
|
258 |
log.info(rows);
|
|
255 | 259 |
|
256 | 260 |
final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows); |
257 | 261 |
|
... | ... | |
539 | 543 |
if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) { |
540 | 544 |
final Document doc = r.read(new StringReader(val)); |
541 | 545 |
|
542 |
System.out.println("\n" + e.getKey());
|
|
546 |
log.debug("\n" + e.getKey());
|
|
543 | 547 |
for (final String xpath : xpaths.get(kd.getType())) { |
544 |
System.out.println(doc.valueOf(xpath));
|
|
548 |
log.debug(doc.valueOf(xpath));
|
|
545 | 549 |
} |
546 | 550 |
} else { |
547 | 551 |
|
548 |
System.out.println(val);
|
|
552 |
log.info(val);
|
|
549 | 553 |
} |
550 | 554 |
} |
551 | 555 |
} |
... | ... | |
554 | 558 |
private void printNoIndent(final Map<String, XmlRecordFactory> builders) { |
555 | 559 |
for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) { |
556 | 560 |
if (e.getValue().isValid()) { |
557 |
System.out.println(e.getValue().build());
|
|
561 |
log.debug(e.getValue().build());
|
|
558 | 562 |
} else { |
559 |
System.out.println("invalid builder: " + e.getKey());
|
|
563 |
log.debug("invalid builder: " + e.getKey());
|
|
560 | 564 |
} |
561 | 565 |
} |
562 | 566 |
} |
... | ... | |
576 | 580 |
return getClass().getResourceAsStream(fileName); |
577 | 581 |
} |
578 | 582 |
|
583 |
private InputStream loadFromTransformationProfile(final String profilePath) { |
|
584 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
585 |
InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
586 |
SAXReader saxReader = new SAXReader(); |
|
587 |
Document doc = null; |
|
588 |
try { |
|
589 |
doc = saxReader.read(profile); |
|
590 |
} catch (DocumentException e) { |
|
591 |
e.printStackTrace(); |
|
592 |
throw new RuntimeException(e); |
|
593 |
} |
|
594 |
String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
595 |
return IOUtils.toInputStream(xslt); |
|
596 |
} |
|
597 |
|
|
579 | 598 |
@Test |
599 |
public void testLoadFromTransformationProfile() throws IOException { |
|
600 |
InputStream in = loadFromTransformationProfile("oaf2hbase.xml"); |
|
601 |
log.info(IOUtils.toString(in)); |
|
602 |
} |
|
603 |
|
|
604 |
@Test |
|
580 | 605 |
public void test_template() throws Exception { |
581 |
final String xslt = IOUtils.toString(load("dmf_2_hbase.xsl"));
|
|
606 |
final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
|
|
582 | 607 |
final XsltRowTransformer transformer = factory.getTransformer(xslt); |
583 | 608 |
assertNotNull(transformer); |
584 | 609 |
|
modules/dnet-mapreduce-jobs/trunk/src/test/resources/log4j.properties | ||
---|---|---|
1 |
### Root Level ### |
|
2 |
log4j.rootLogger=WARN, CONSOLE |
|
3 |
|
|
4 |
### Configuration for the CONSOLE appender ### |
|
5 |
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender |
|
6 |
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout |
|
7 |
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n |
|
8 |
|
|
9 |
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger |
|
10 |
|
|
11 |
### Application Level ### |
|
12 |
log4j.logger.eu.dnetlib=INFO |
|
13 |
log4j.logger.eu.dnetlib.data.transform.xml.AbstractDNetOafXsltFunctions=DEBUG |
|
14 |
log4j.logger.eu.dnetlib.data.transform.XsltRowTransformerFactoryTest=INFO |
modules/dnet-mapreduce-jobs/trunk/pom.xml | ||
---|---|---|
4 | 4 |
<parent> |
5 | 5 |
<groupId>eu.dnetlib</groupId> |
6 | 6 |
<artifactId>dnet-hadoop-parent</artifactId> |
7 |
<version>1.0.0</version> |
|
7 |
<version>1.0.0-SNAPSHOT</version>
|
|
8 | 8 |
<relativePath/> |
9 | 9 |
</parent> |
10 | 10 |
<modelVersion>4.0.0</modelVersion> |
... | ... | |
225 | 225 |
<groupId>org.mockito</groupId> |
226 | 226 |
<artifactId>mockito-all</artifactId> |
227 | 227 |
<version>1.8.5</version> |
228 |
<scope>test</scope> |
|
228 | 229 |
</dependency> |
230 |
<dependency> |
|
231 |
<groupId>eu.dnetlib</groupId> |
|
232 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
233 |
<version>[1.0.9-SNAPSHOT]</version> |
|
234 |
<scope>test</scope> |
|
235 |
</dependency> |
|
229 | 236 |
|
230 | 237 |
</dependencies> |
231 | 238 |
</project> |
Also available in: Unified diff
Tests load gthe XSLT from the TDSRule profiles in dnet-openaireplus-profiles