Project

General

Profile

« Previous | Next » 

Revision 45459

refactoring and simplifying the HBase mapping mechanism

View differences:

dmfdli2hbase.xml
14 14
            <SCRIPT>
15 15
	            <TITLE>xslt_mapping_dmf_wds2hbase</TITLE>
16 16
                <CODE>
17
                    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
18
                                    xmlns:dnet="eu.dnetlib.data.transform.xml.DmfToHbaseXsltFunctions"
19
                                    xmlns:exslt="http://exslt.org/common"
20
                                    xmlns:oaf="http://namespace.dnet.eu/oaf"
21
                                    xmlns:dc="http://purl.org/dc/elements/1.1/"
22
                                    xmlns:dri="http://www.driver-repository.eu/namespace/dri"
23
                                    xmlns:dr="http://www.driver-repository.eu/namespace/dr"
24
                                    version="1.0" extension-element-prefixes="exslt"
25
                                    exclude-result-prefixes="xsl oaf dr dri dnet exslt">
17
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
18
                xmlns:dnet="eu.dnetlib.data.transform.xml.DmfToHbaseXsltFunctions"
19
                xmlns:exslt="http://exslt.org/common"
20
                xmlns:oaf="http://namespace.dnet.eu/oaf"
21
                xmlns:dc="http://purl.org/dc/elements/1.1/"
22
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
23
                xmlns:dr="http://www.driver-repository.eu/namespace/dr"
24
                version="1.0" extension-element-prefixes="exslt"
25
                exclude-result-prefixes="xsl oaf dr dri dnet exslt">
26 26

  
27
                        <xsl:output omit-xml-declaration="yes" indent="yes"/>
28
                        <xsl:template match="/*">
29
                            <xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
30
                            <xsl:variable name="dateOfCollection" select="concat('', //dri:dateOfCollection)"/>
31
                            <xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/>
27
    <xsl:output omit-xml-declaration="yes" indent="yes"/>
28
    <xsl:template match="/*">
29
        <xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
30
        <xsl:variable name="dateOfCollection" select="concat('', //dri:dateOfCollection)"/>
31
        <xsl:variable name="dateoftransformation" select="concat('', //dr:dateOfTransformation)"/>
32 32

  
33
                            <xsl:variable name="trust" select="string('0.9')"/>
34
                            <xsl:variable name="provenance" select="string('sysimport:crosswalk:datasetarchive')"/>
33
        <xsl:variable name="trust" select="string('0.9')"/>
34
        <xsl:variable name="provenance" select="string('sysimport:crosswalk:datasetarchive')"/>
35 35

  
36
                            <xsl:variable name="metadata"
37
                                          select="exslt:node-set(//*[local-name()='metadata']/*)"/>
38
                            <xsl:variable name="namespaceprefix">
39
                                <xsl:choose>
40
                                    <!-- TODO check namespaceprefix length is 12 -->
41
                                    <xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
42
                                        <xsl:value-of select="//oaf:datasourceprefix"/>
43
                                    </xsl:when>
44
                                    <xsl:otherwise>
45
                                        <xsl:value-of select="unknown_____"/>
46
                                    </xsl:otherwise>
47
                                </xsl:choose>
48
                            </xsl:variable>
36
        <xsl:variable name="metadata"
37
                      select="exslt:node-set(//*[local-name()='metadata']/*)"/>
38
        <xsl:variable name="namespaceprefix">
39
            <xsl:choose>
40
                <!-- TODO check namespaceprefix length is 12 -->
41
                <xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
42
                    <xsl:value-of select="//oaf:datasourceprefix"/>
43
                </xsl:when>
44
                <xsl:otherwise>
45
                    <xsl:value-of select="unknown_____"/>
46
                </xsl:otherwise>
47
            </xsl:choose>
48
        </xsl:variable>
49 49

  
50
                            <xsl:choose>
51
                                <xsl:when test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'">
52
                                    <ROWS/>
53
                                </xsl:when>
54
                                <xsl:otherwise>
50
        <xsl:choose>
51
            <xsl:when test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'">
52
                <ROWS/>
53
            </xsl:when>
54
            <xsl:otherwise>
55 55

  
56
                                    <xsl:variable name="datasetId"
57
                                                  select="dnet:oafSimpleId('dataset', //dri:objIdentifier)"/>
56
                <xsl:variable name="datasetId"
57
                              select="dnet:oafSimpleId('dataset', //dri:objIdentifier)"/>
58 58

  
59
                                    <xsl:if test="string-length($datasetId) &gt; 0">
60
                                        <xsl:variable name="originalids"
61
                                                      select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/>
62
                                        <xsl:variable name="creators" select="//*[local-name() = 'creator']"/>
63
                                        <xsl:variable name="titles" select="//*[local-name() = 'title']"/>
64
                                        <xsl:variable name="subjects" select="//*[local-name() = 'subject']"/>
65
                                        <xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/>
66
                                        <xsl:variable name="descriptions" select="//*[local-name() = 'description']"/>
67
                                        <xsl:variable name="dates" select="//*[local-name() = 'date']"/>
68
                                        <xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/>
69
                                        <xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/>
70
                                        <xsl:variable name="formats" select="//*[local-name() = 'format']"/>
71
                                        <xsl:variable name="sizes" select="//*[local-name() = 'size']"/>
72
                                        <xsl:variable name="rights" select="//oaf:accessrights"/>
73
                                        <xsl:variable name="version" select="//*[local-name() = 'version']"/>
74
                                        <xsl:variable name="language" select="//oaf:language"/>
75
                                        <xsl:variable name="cobjcategory" select="//dr:CobjCategory"/>
76
                                        <xsl:variable name="contributor"
77
                                                      select="//*[local-name() = 'creator']/*[local-name() = 'creatorName']"/>
78
                                        <xsl:variable name="namespaceprefix"
79
                                                      select="//*[local-name() = 'datasourceprefix']"/>
59
                <xsl:if test="string-length($datasetId) &gt; 0">
60
                    <xsl:variable name="originalids"
61
                                  select="//*[local-name() = 'resource']/*[local-name()='identifier'] | //*[local-name() = 'resource']//*[local-name()='alternateIdentifier']"/>
62
                    <xsl:variable name="creators" select="//*[local-name() = 'creator']"/>
63
                    <xsl:variable name="titles" select="//*[local-name() = 'title']"/>
64
                    <xsl:variable name="subjects" select="//*[local-name() = 'subject']"/>
65
                    <xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/>
66
                    <xsl:variable name="descriptions" select="//*[local-name() = 'description']"/>
67
                    <xsl:variable name="dates" select="//*[local-name() = 'date']"/>
68
                    <xsl:variable name="dateaccepted" select="//oaf:dateAccepted"/>
69
                    <xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/>
70
                    <xsl:variable name="formats" select="//*[local-name() = 'format']"/>
71
                    <xsl:variable name="sizes" select="//*[local-name() = 'size']"/>
72
                    <xsl:variable name="rights" select="//oaf:accessrights"/>
73
                    <xsl:variable name="version" select="//*[local-name() = 'version']"/>
74
                    <xsl:variable name="language" select="//oaf:language"/>
75
                    <xsl:variable name="cobjcategory" select="//dr:CobjCategory"/>
76
                    <xsl:variable name="contributor" select="//*[local-name() = 'creator']/*[local-name() = 'creatorName']"/>
77
                    <xsl:variable name="namespaceprefix" select="//*[local-name() = 'datasourceprefix']"/>
78
                    <xsl:variable name="pid" select="//*[local-name()='resource']/*[local-name()='identifier']/text()"/>
79
                    <xsl:variable name="pidType" select="//*[local-name()='resource']/*[local-name()='identifier']/@identifierType"/>
80
                    <xsl:variable name="aboutNode" select="//oaf:about"/>
81
                    <xsl:variable name="pids" select="//*[local-name() = 'resource']//*[local-name()='identifier'or local-name()='alternateIdentifier']"/>
80 82

  
81
                                        <xsl:variable name="pid"
82
                                                      select="//*[local-name()='resource']/*[local-name()='identifier']/text()"/>
83
                                        <xsl:variable name="pidType"
84
                                                      select="//*[local-name()='resource']/*[local-name()='identifier']/@identifierType"/>
83
                    <ROWS>
84
                        <ROW>
85
                            <xsl:attribute name="value">
86
                                <xsl:value-of select="dnet:dliDataset($datasetId, $about, $titles, $subjects, $publisher, $descriptions,
87
                                $dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributor, $rights,
88
                                $pids, $provenance, $aboutNode, $originalids, $dateOfCollection, $dateoftransformation, $trust)"/>
89
                            </xsl:attribute>
90
                        </ROW>
85 91

  
92
                        <!--PUBLICATION RELATIONS-->
93
                        <xsl:for-each select="//*[local-name()='relatedIdentifier' and ./@entityType='publication']">
86 94

  
87
                                        <xsl:variable name="aboutNode"
88
                                                      select="//oaf:about"/>
95
                            <xsl:variable name="relationSemantic" select="./@relationType"/>
96
                            <xsl:variable name="inverseRelationSemantic" select="./@inverseRelationType"/>
89 97

  
90
                                        <xsl:variable name="pids"
91
                                                      select="//*[local-name() = 'resource']//*[local-name()='identifier'or local-name()='alternateIdentifier']"/>
98
                            <!-- relatedDataset ids must be in the openaire format  -->
99
                            <xsl:variable name="publicationId" select="dnet:oafSimpleId('publication', normalize-space(.))"/>
100
                            <xsl:if test="string-length($datasetId) &gt; 0">
101
                                <ROW>
102
                                    <xsl:attribute name="value">
103
                                        <xsl:value-of select="dnet:rel($publicationId, $datasetId, $relationSemantic, 'publication_dataset', $provenance, $trust, $about)"/>
104
                                    </xsl:attribute>
105
                                </ROW>
106
                                <ROW>
107
                                    <xsl:attribute name="value">
108
                                        <xsl:value-of select="dnet:rel($datasetId, $publicationId, $inverseRelationSemantic, 'publication_dataset', $provenance, $trust, $about)"/>
109
                                    </xsl:attribute>
110
                                </ROW>
111
                            </xsl:if>
112
                        </xsl:for-each>
92 113

  
93
                                        <ROWS>
94
                                            <ROW key="{$datasetId}" columnFamily="metadata">
95
                                                <QUALIFIER name="dataset" type="base64">
96
                                                    <xsl:value-of select="dnet:dliDataset($datasetId, $about, $titles, $subjects, $publisher, $descriptions,
97
        $dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributor, $rights,
98
        $pids,  $provenance,  $aboutNode, $originalids,  $dateOfCollection, $dateoftransformation, $trust)"/>
99
                                                </QUALIFIER>
100
                                            </ROW>
114
                        <!--DATASET RELATIONS-->
115
                        <xsl:for-each select="//*[local-name()='relatedIdentifier' and ./@entityType='dataset']">
116
                            <xsl:variable name="relationSemantic" select="./@relationType"/>
117
                            <xsl:variable name="inverseRelationSemantic" select="./@inverseRelationType"/>
101 118

  
119
                            <!-- relatedDataset ids must be in the openaire format  -->
120
                            <xsl:variable name="relDatasetId" select="dnet:oafSimpleId('dataset', normalize-space(.))"/>
102 121

  
103
                                            <!--PUBLICATION RELATIONS-->
104
                                            <xsl:for-each
105
                                                    select="//*[local-name()='relatedIdentifier' and ./@entityType='publication']">
122
                            <xsl:if test="string-length($datasetId) &gt; 0">
123
                                <ROW>
124
                                    <xsl:attribute name="value">
125
                                        <xsl:value-of select="dnet:rel($relDatasetId, $datasetId, $relationSemantic, 'dataset_dataset', $provenance, $trust, $about)"/>
126
                                    </xsl:attribute>
127
                                </ROW>
128
                                <ROW>
129
                                    <xsl:attribute name="value">
130
                                        <xsl:value-of select="dnet:rel($datasetId, $relDatasetId, $inverseRelationSemantic, 'dataset_dataset', $provenance, $trust, $about)"/>
131
                                    </xsl:attribute>
132
                                </ROW>
133
                            </xsl:if>
134
                        </xsl:for-each>
106 135

  
107
                                                <xsl:variable name="relationSemantic" select="./@relationType"/>
108
                                                <xsl:variable name="inverseRelationSemantic"
109
                                                              select="dnet:inverseRelation($relationSemantic)"/>
136
                        <!--UNKOWN RELATIONS-->
137
                        <xsl:for-each select="//*[local-name()='relatedIdentifier' and ./@entityType='unknown']">
110 138

  
139
                            <xsl:variable name="relationSemantic" select="./@relationType"/>
140
                            <xsl:variable name="inverseRelationSemantic" select="./@inverseRelationType"/>
111 141

  
112
                                                <!-- relatedDataset ids must be in the openaire format  -->
113
                                                <xsl:variable name="publicationId"
114
                                                              select="dnet:oafSimpleId('publication', normalize-space(.))"/>
142
                            <xsl:variable name="enityId" select="dnet:createEntityId(./@relatedIdentifierType, ., $namespaceprefix)"/>
115 143

  
116

  
117
                                                <xsl:variable name="columnFamily"
118
                                                              select="dnet:createColumnFamily($relationSemantic)"/>
119

  
120
                                                <xsl:if test="string-length($datasetId) &gt; 0">
121
                                                    <ROW key="{$publicationId}">
122
                                                        <xsl:attribute name="columnFamily">
123
                                                            <xsl:value-of
124
                                                                    select="concat('publicationDataset_',$columnFamily,'_',$relationSemantic)"/>
125
                                                        </xsl:attribute>
126

  
127
                                                        <QUALIFIER name="{$datasetId}" type="base64">
128
                                                            <xsl:value-of
129
                                                                    select="dnet:createRel($publicationId, $datasetId, 'publicationDataset', $columnFamily, $relationSemantic, $provenance, $trust, $about)"/>
130
                                                        </QUALIFIER>
131
                                                    </ROW>
132
                                                    <ROW key="{$datasetId}">
133
                                                        <xsl:attribute name="columnFamily">
134
                                                            <xsl:value-of
135
                                                                    select="concat('publicationDataset_',$columnFamily,'_',$inverseRelationSemantic)"/>
136
                                                        </xsl:attribute>
137
                                                        <QUALIFIER name="{$publicationId}" type="base64">
138
                                                            <xsl:value-of
139
                                                                    select="dnet:createRel( $datasetId, $publicationId, 'publicationDataset', $columnFamily, $relationSemantic, $provenance, $trust, $about)"/>
140
                                                        </QUALIFIER>
141
                                                    </ROW>
142

  
143
                                                </xsl:if>
144
                                            </xsl:for-each>
145
                                            <!--DATASET RELATIONS-->
146
                                            <xsl:for-each
147
                                                    select="//*[local-name()='relatedIdentifier' and ./@entityType='dataset']">
148

  
149
                                                <xsl:variable name="relationSemantic" select="./@relationType"/>
150
                                                <xsl:variable name="inverseRelationSemantic"
151
                                                              select="dnet:inverseRelation($relationSemantic)"/>
152

  
153

  
154
                                                <!-- relatedDataset ids must be in the openaire format  -->
155
                                                <xsl:variable name="relDatasetId"
156
                                                              select="dnet:oafSimpleId('dataset', normalize-space(.))"/>
157

  
158

  
159
                                                <xsl:variable name="columnFamily"
160
                                                              select="dnet:createColumnFamily($relationSemantic)"/>
161

  
162
                                                <xsl:if test="string-length($datasetId) &gt; 0">
163
                                                    <ROW key="{$relDatasetId}">
164
                                                        <xsl:attribute name="columnFamily">
165
                                                            <xsl:value-of
166
                                                                    select="concat('datasetDataset_',$columnFamily,'_',$relationSemantic)"/>
167
                                                        </xsl:attribute>
168

  
169
                                                        <QUALIFIER name="{$datasetId}" type="base64">
170
                                                            <xsl:value-of
171
                                                                    select="dnet:createRel($relDatasetId, $datasetId, 'datasetDataset', $columnFamily, $relationSemantic, $provenance, $trust, $about)"/>
172
                                                        </QUALIFIER>
173
                                                    </ROW>
174
                                                    <ROW key="{$datasetId}">
175
                                                        <xsl:attribute name="columnFamily">
176
                                                            <xsl:value-of
177
                                                                    select="concat('datasetDataset_',$columnFamily,'_',$inverseRelationSemantic)"/>
178
                                                        </xsl:attribute>
179
                                                        <QUALIFIER name="{$relDatasetId}" type="base64">
180
                                                            <xsl:value-of
181
                                                                    select="dnet:createRel( $datasetId, $relDatasetId, 'datasetDataset', $columnFamily, $relationSemantic, $provenance, $trust, $about)"/>
182
                                                        </QUALIFIER>
183
                                                    </ROW>
184

  
185
                                                </xsl:if>
186
                                            </xsl:for-each>
187
                                            <!--UNKOWN RELATIONS-->
188
                                            <xsl:for-each
189
                                                    select="//*[local-name()='relatedIdentifier' and ./@entityType='unknown']">
190

  
191
                                                <xsl:variable name="relationSemantic" select="./@relationType"/>
192
                                                <xsl:variable name="inverseRelationSemantic"
193
                                                              select="dnet:inverseRelation($relationSemantic)"/>
194

  
195
                                                <xsl:variable name="columnFamily"
196
                                                              select="dnet:createColumnFamily($relationSemantic)"/>
197

  
198
                                                <xsl:variable name="enityId">
199
                                                    <xsl:value-of
200
                                                            select="dnet:createEntityId(./@relatedIdentifierType, ., $namespaceprefix)"/>
201
                                                </xsl:variable>
202

  
203
                                                <ROW key="{$enityId}" columnFamily="metadata">
204
                                                    <QUALIFIER name="body" type="base64">
205
                                                        <xsl:value-of
206
                                            select="dnet:createEntity(./@relatedIdentifierType, ., $namespaceprefix, $about)"/>
207
                                                    </QUALIFIER>
208
                                                </ROW>
209

  
210

  
211
                                                <ROW key="{$datasetId}">
212

  
213
                                                    <xsl:attribute name="columnFamily">
214
                                                        <xsl:value-of
215
                                                                select="concat('datasetUnknown_',$columnFamily,'_',$relationSemantic)"/>
216
                                                    </xsl:attribute>
217
                                                    <QUALIFIER name="{$enityId}" type="base64">
218
                                                        <xsl:value-of
219
                                            select="dnet:createRel($datasetId, $enityId, 'datasetUnknown', $columnFamily,$relationSemantic, $provenance, $trust, $about)"/>
220
                                                    </QUALIFIER>
221
                                                </ROW>
222
                                                <ROW key="{$enityId}" columnFamily="rel">
223
                                                    <QUALIFIER name="{$datasetId}" type="base64">
224
                                                        <xsl:value-of
225
                                            select="dnet:createRel($enityId, $datasetId, 'datasetUnknown', $columnFamily,$relationSemantic, $provenance, $trust, $about)"/>
226
                                                    </QUALIFIER>
227
                                                </ROW>
228
                                            </xsl:for-each>
229

  
230
                                        </ROWS>
231
                                    </xsl:if>
232
                                </xsl:otherwise>
233
                            </xsl:choose>
234
                        </xsl:template>
144
                            <ROW>
145
                                <xsl:attribute name="value">
146
                                    <xsl:value-of select="dnet:createEntity(./@relatedIdentifierType, ., $namespaceprefix, $about)"/>
147
                                </xsl:attribute>
148
                            </ROW>
149
                            <ROW>
150
                                <xsl:attribute name="value">
151
                                    <xsl:value-of select="dnet:rel($datasetId, $enityId, $relationSemantic, 'dataset_unknown', $provenance, $trust, $about)"/>
152
                                </xsl:attribute>
153
                            </ROW>
154
                            <ROW>
155
                                <xsl:attribute name="value">
156
                                    <xsl:value-of select="dnet:rel($enityId, $datasetId, $inverseRelationSemantic, 'dataset_unknown', $provenance, $trust, $about)"/>
157
                                </xsl:attribute>
158
                            </ROW>
159
                        </xsl:for-each>
160
                    </ROWS>
161
                </xsl:if>
162
            </xsl:otherwise>
163
        </xsl:choose>
164
    </xsl:template>
235 165
</xsl:stylesheet>
236 166
	</CODE>
237 167
            </SCRIPT>

Also available in: Unified diff