Revision 49882
Added by Sandro La Bruzzo over 6 years ago
modules/dnet-wds-domain/trunk/src/test/resources/inputDMF.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
5 |
<oai:header> |
|
6 |
<dri:objIdentifier>r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier> |
|
7 |
<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier> |
|
8 |
<dri:dateOfCollection>2017-10-13T15:05:18.483+02:00</dri:dateOfCollection> |
|
9 |
<dri:repositoryId>d35a517d-341a-4ba2-85c5-31f43eb5e46b_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
10 |
<dri:datasourceprefix>r3d100010134</dri:datasourceprefix> |
|
11 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:pangaea.de:doi:10.1594/PANGAEA.432865</identifier> |
|
12 |
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2017-08-04T21:17:31Z</datestamp> |
|
13 |
</oai:header> |
|
14 |
<metadata xmlns="http://www.openarchives.org/OAI/2.0/"> |
|
15 |
<resource xmlns="http://datacite.org/schema/kernel-3" |
|
16 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
|
17 |
<identifier xmlns="" identifierType="DOI">10.1594/PANGAEA.432865</identifier> |
|
18 |
<creators xmlns=""> |
|
19 |
<creator> |
|
20 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
21 |
</creator> |
|
22 |
</creators> |
|
23 |
<titles xmlns=""> |
|
24 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research quality database)</title> |
|
25 |
</titles> |
|
26 |
<publisher xmlns="">PANGAEA - Data Publisher for Earth & Environmental Science</publisher> |
|
27 |
<publicationYear xmlns="">2006</publicationYear> |
|
28 |
<subjects xmlns=""> |
|
29 |
<subject subjectScheme="Parameter">DATE/TIME</subject> |
|
30 |
<subject subjectScheme="Parameter">Sea level</subject> |
|
31 |
<subject subjectScheme="Campaign">SeaLevel</subject> |
|
32 |
<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject> |
|
33 |
</subjects> |
|
34 |
<contributors xmlns=""> |
|
35 |
<contributor contributorType="HostingInstitution"> |
|
36 |
<contributorName>Sea Level Center, University of Hawaii</contributorName> |
|
37 |
</contributor> |
|
38 |
</contributors> |
|
39 |
<dates xmlns=""> |
|
40 |
<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date> |
|
41 |
</dates> |
|
42 |
<language xmlns="">eng</language> |
|
43 |
<resourceType xmlns="" resourceTypeGeneral="Dataset">Dataset</resourceType> |
|
44 |
<relatedIdentifiers xmlns=""> |
|
45 |
<relatedIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf" inverseRelationType="documents" |
|
46 |
relatedIdentifierType="URL" |
|
47 |
relationType="isDocumentedBy">http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt</relatedIdentifier> |
|
48 |
</relatedIdentifiers> |
|
49 |
<sizes xmlns=""> |
|
50 |
<size>365 data points</size> |
|
51 |
</sizes> |
|
52 |
<formats xmlns=""> |
|
53 |
<format>text/tab-separated-values</format> |
|
54 |
</formats> |
|
55 |
<rightsList xmlns=""> |
|
56 |
<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 Unported (CC-BY)</rights> |
|
57 |
</rightsList> |
|
58 |
<geoLocations xmlns=""> |
|
59 |
<geoLocation> |
|
60 |
<geoLocationPoint>41.5233 -70.6717</geoLocationPoint> |
|
61 |
</geoLocation> |
|
62 |
</geoLocations> |
|
63 |
</resource> |
|
64 |
</metadata> |
|
65 |
<oai:about> |
|
66 |
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" |
|
67 |
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd"> |
|
68 |
<originDescription xmlns="" altered="true" harvestDate="2017-10-13T15:05:18.483+02:00"> |
|
69 |
<baseURL>http%3A%2F%2Fws.pangaea.de%2Foai%2Fprovider</baseURL> |
|
70 |
<identifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</identifier> |
|
71 |
<datestamp>2017-08-04T21:17:31Z</datestamp> |
|
72 |
<metadataNamespace/> |
|
73 |
</originDescription> |
|
74 |
</provenance> |
|
75 |
</oai:about> |
|
76 |
</oai:record> |
modules/dnet-wds-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/RelatedIdentifier.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import org.apache.commons.lang3.StringUtils; |
|
4 |
|
|
5 |
import static eu.dnetlib.data.proto.WdsDatasetProtos.*; |
|
6 |
|
|
7 |
|
|
8 |
public class RelatedIdentifier { |
|
9 |
|
|
10 |
private String entityType ; |
|
11 |
private String targetId ; |
|
12 |
private String relationSemantic; |
|
13 |
private String targetIdType; |
|
14 |
|
|
15 |
public RelatedIdentifier() { |
|
16 |
} |
|
17 |
|
|
18 |
public RelatedIdentifier(String entityType, String targetId, String relationSemantic, String targetIdType) { |
|
19 |
this.entityType = entityType; |
|
20 |
this.targetId = targetId; |
|
21 |
this.relationSemantic = relationSemantic; |
|
22 |
this.targetIdType = targetIdType; |
|
23 |
} |
|
24 |
|
|
25 |
public String getEntityType() { |
|
26 |
return entityType; |
|
27 |
} |
|
28 |
|
|
29 |
public RelatedIdentifier setEntityType(String entityType) { |
|
30 |
this.entityType = entityType; |
|
31 |
return this; |
|
32 |
} |
|
33 |
|
|
34 |
public String getTargetId() { |
|
35 |
return targetId; |
|
36 |
} |
|
37 |
|
|
38 |
public RelatedIdentifier setTargetId(String targetId) { |
|
39 |
this.targetId = targetId; |
|
40 |
return this; |
|
41 |
} |
|
42 |
|
|
43 |
public String getRelationSemantic() { |
|
44 |
return relationSemantic; |
|
45 |
} |
|
46 |
|
|
47 |
public RelatedIdentifier setRelationSemantic(String relationSemantic) { |
|
48 |
this.relationSemantic = relationSemantic; |
|
49 |
return this; |
|
50 |
} |
|
51 |
|
|
52 |
public String getTargetIdType() { |
|
53 |
return targetIdType; |
|
54 |
} |
|
55 |
|
|
56 |
public RelatedIdentifier setTargetIdType(String targetIdType) { |
|
57 |
this.targetIdType = targetIdType; |
|
58 |
return this; |
|
59 |
} |
|
60 |
|
|
61 |
|
|
62 |
public otherRelation toOtherRelation () { |
|
63 |
final otherRelation.Builder builder = otherRelation.newBuilder(); |
|
64 |
builder.setRelationSemantic(relationSemantic); |
|
65 |
builder.setTarget(otherRelationIdentifier.newBuilder().setId(targetId).setType(targetIdType).build()); |
|
66 |
if (StringUtils.isNotEmpty(entityType)) { |
|
67 |
builder.setTargetType(entityType); |
|
68 |
} |
|
69 |
return builder.build(); |
|
70 |
} |
|
71 |
} |
modules/dnet-wds-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/WdsToHbaseXsltFunctions.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.transform.xml; |
2 | 2 |
|
3 |
import java.util.ArrayList; |
|
3 | 4 |
import java.util.List; |
4 | 5 |
|
5 | 6 |
import com.google.common.collect.Lists; |
7 |
import eu.dnetlib.data.proto.DNGFProtos; |
|
6 | 8 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
7 | 9 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
8 | 10 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity.Builder; |
11 |
import eu.dnetlib.data.proto.FieldTypeProtos; |
|
9 | 12 |
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset; |
10 | 13 |
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset.GeoLocation; |
11 | 14 |
import org.apache.commons.lang3.StringUtils; |
15 |
import org.w3c.dom.NamedNodeMap; |
|
12 | 16 |
import org.w3c.dom.Node; |
13 | 17 |
import org.w3c.dom.NodeList; |
14 | 18 |
|
... | ... | |
45 | 49 |
final NodeList originalIds, |
46 | 50 |
final String instanceUri, |
47 | 51 |
final String dateOfCollection, |
48 |
final String dateOfTransformation) { |
|
52 |
final String dateOfTransformation, |
|
53 |
final NodeList relatedIdentifiers |
|
54 |
) { |
|
49 | 55 |
|
50 | 56 |
try { |
51 | 57 |
final DNGFEntity.Builder entity = odfDatasetProto( |
... | ... | |
54 | 60 |
pidList, hostedbyId, hostedbyName, collectedfromId, collectedfromName, |
55 | 61 |
originalIds, instanceUri, dateOfCollection, dateOfTransformation); |
56 | 62 |
|
63 |
|
|
64 |
List<RelatedIdentifier> parsedRelatedIdentifiers = parseRelatedIdentifiers(relatedIdentifiers); |
|
65 |
parsedRelatedIdentifiers.stream().filter(it -> !it.getTargetIdType().equals("dnet")).forEach( |
|
66 |
r -> entity.getDatasetBuilder().getMetadataBuilder().addExtension(WdsDataset.otherRels, r.toOtherRelation())); |
|
67 |
|
|
57 | 68 |
if(geoLocations != null) { |
58 | 69 |
parseGeoLocations(entity, geoLocations); |
59 | 70 |
} |
60 | 71 |
|
61 | 72 |
final DNGF dngf = getOaf(entity, getDataInfo(about, provenance, trust, false, false)); |
73 |
|
|
74 |
|
|
75 |
|
|
76 |
|
|
62 | 77 |
return base64(dngf.toByteArray()); |
63 | 78 |
} catch (Throwable e) { |
64 | 79 |
e.printStackTrace(System.err); |
... | ... | |
108 | 123 |
geos.forEach(g -> entity.getDatasetBuilder().getMetadataBuilder().addExtension(WdsDataset.geolocation, g)); |
109 | 124 |
} |
110 | 125 |
|
126 |
public static String createRel( |
|
127 |
final String source, |
|
128 |
final String target, |
|
129 |
final String relationSemantic, |
|
130 |
final String relType, |
|
131 |
final String provenanceAction, |
|
132 |
final String trust, |
|
133 |
final NodeList about) { |
|
134 |
|
|
135 |
try { |
|
136 |
DNGFProtos.DNGFRel.Builder rel = relProto(source.trim(), target.trim(), relationSemantic, relType); |
|
137 |
final DNGFProtos.DNGF pmf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false)); |
|
138 |
return base64(pmf.toByteArray()); |
|
139 |
} catch (Throwable e) { |
|
140 |
e.printStackTrace(System.err); |
|
141 |
throw new RuntimeException(e); |
|
142 |
} |
|
143 |
|
|
144 |
} |
|
145 |
|
|
146 |
|
|
147 |
private static List<RelatedIdentifier> parseRelatedIdentifiers(final NodeList relatedIdentifier) { |
|
148 |
List<RelatedIdentifier> result = new ArrayList<>(); |
|
149 |
for(int i =0; i < relatedIdentifier.getLength(); i++ ) { |
|
150 |
final Node currentNode = relatedIdentifier.item(i); |
|
151 |
final String targetId =currentNode.getTextContent(); |
|
152 |
final NamedNodeMap attributes = currentNode.getAttributes(); |
|
153 |
final String entityType = getAttributeValue(attributes, "entityType"); |
|
154 |
final String relatedIdentifierType = getAttributeValue(attributes, "relatedIdentifierType"); |
|
155 |
final String relationType = getAttributeValue(attributes, "relationType"); |
|
156 |
result.add(new RelatedIdentifier(entityType, targetId, relationType, relatedIdentifierType)); |
|
157 |
} |
|
158 |
return result; |
|
159 |
|
|
160 |
} |
|
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
|
|
166 |
|
|
111 | 167 |
} |
modules/dnet-wds-domain/trunk/src/main/resources/eu/dnetlib/data/proto/wds/WdsDataset.proto | ||
---|---|---|
5 | 5 |
option java_package = "eu.dnetlib.data.proto"; |
6 | 6 |
option java_outer_classname = "WdsDatasetProtos"; |
7 | 7 |
|
8 |
|
|
9 |
|
|
10 |
message otherRelationIdentifier { |
|
11 |
required string id = 1; |
|
12 |
optional string type = 2; |
|
13 |
} |
|
14 |
|
|
15 |
message otherRelation { |
|
16 |
required otherRelationIdentifier target = 1; |
|
17 |
required string relationSemantic = 2; |
|
18 |
optional string targetType = 3; |
|
19 |
} |
|
20 |
|
|
8 | 21 |
message WdsDataset { |
9 | 22 |
|
10 | 23 |
extend Dataset.Metadata { |
11 | 24 |
repeated GeoLocation geolocation = 100; |
25 |
repeated otherRelation otherRels = 101; |
|
12 | 26 |
} |
13 | 27 |
|
14 | 28 |
message GeoLocation { |
modules/dnet-wds-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dmfwds2hbase.xml | ||
---|---|---|
72 | 72 |
<xsl:variable name="language" select="//oaf:language"/> |
73 | 73 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory"/> |
74 | 74 |
<xsl:variable name="contributor" select="//*[local-name() = 'contributor']/*[local-name() = 'contributorName']"/> |
75 |
<xsl:variable name="relatedIdentifiers" select="//*[local-name() = 'relatedIdentifier']"/> |
|
75 | 76 |
|
76 | 77 |
<xsl:variable name="relatedIdentifiers" |
77 | 78 |
select="//*[local-name() = 'relatedIdentifier']"/> |
... | ... | |
111 | 112 |
select="dnet:wdsDataset($datasetId, $about, $titles, $subjects, $publisher, $descriptions, |
112 | 113 |
$dates, $dateaccepted, $resourceType, $formats, $sizes, $language, $cobjcategory, $contributor, $rights, |
113 | 114 |
$version, $pids, $geoLocations, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, |
114 |
$collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation)"/> |
|
115 |
$collectedfromname, $originalids, $instanceURI, $dateOfCollection, $dateoftransformation, $relatedIdentifiers)"/>
|
|
115 | 116 |
|
116 | 117 |
<ROWS> |
117 |
<ROW key="{$datasetId}" columnFamily="dataset"> |
|
118 |
<QUALIFIER name="body" type="base64"> |
|
119 |
<xsl:value-of select="$dataset"/> |
|
120 |
</QUALIFIER> |
|
121 |
</ROW> |
|
122 |
<xsl:for-each select="//*[local-name() = 'creator']"> |
|
118 |
<ROW> |
|
119 |
<xsl:attribute name="value"> |
|
120 |
<xsl:value-of select="$dataset"/> |
|
121 |
</xsl:attribute> |
|
122 |
</ROW> |
|
123 | 123 |
|
124 |
<xsl:variable name="personIdTemp"> |
|
125 |
<xsl:choose> |
|
126 |
<xsl:when test="string-length(./*[local-name() = 'nameIdentifier']) > 0"> |
|
127 |
<xsl:value-of |
|
128 |
select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')"/> |
|
129 |
</xsl:when> |
|
130 |
<xsl:otherwise> |
|
131 |
<xsl:value-of select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')"/> |
|
132 |
</xsl:otherwise> |
|
133 |
</xsl:choose> |
|
134 |
</xsl:variable> |
|
124 |
<!--PUBLICATION RELATIONS--> |
|
125 |
<xsl:for-each |
|
126 |
select="//*[normalize-space(local-name())='relatedIdentifier' and normalize-space(./@entityType)='publication']"> |
|
135 | 127 |
|
136 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))"/> |
|
128 |
<xsl:variable name="relationSemantic" select="./@relationType"/> |
|
129 |
<xsl:variable name="inverseRelationSemantic" |
|
130 |
select="./@inverseRelationType"/> |
|
137 | 131 |
|
138 |
<xsl:variable name="originalPersonId" select="./*[local-name() = 'nameIdentifier']"/> |
|
132 |
<!-- relatedDataset ids must be in the openaire format --> |
|
133 |
<xsl:variable name="publicationId"> |
|
134 |
<xsl:choose> |
|
135 |
<xsl:when test="./@relatedIdentifierType = 'dnet'"> |
|
136 |
<xsl:value-of select="concat('50|' , . )"/> |
|
137 |
</xsl:when> |
|
138 |
<xsl:otherwise> |
|
139 |
<xsl:value-of |
|
140 |
select="dnet:oafSimpleId('publication', normalize-space(.))"/> |
|
141 |
</xsl:otherwise> |
|
142 |
</xsl:choose> |
|
143 |
</xsl:variable> |
|
144 |
<xsl:if test="string-length($datasetId) > 0"> |
|
145 |
<ROW> |
|
146 |
<xsl:attribute name="value"> |
|
147 |
<xsl:value-of |
|
148 |
select="dnet:createRel($publicationId, $datasetId, $inverseRelationSemantic, 'publication_dataset', $provenance, $trust, $about)"/> |
|
149 |
</xsl:attribute> |
|
150 |
</ROW> |
|
151 |
<ROW> |
|
152 |
<xsl:attribute name="value"> |
|
153 |
<xsl:value-of |
|
154 |
select="dnet:createRel($datasetId, $publicationId, $relationSemantic, 'publication_dataset', $provenance, $trust, $about)"/> |
|
155 |
</xsl:attribute> |
|
156 |
</ROW> |
|
157 |
</xsl:if> |
|
158 |
</xsl:for-each> |
|
139 | 159 |
|
140 |
<xsl:if test="string-length($personId) > 0"> |
|
141 |
<xsl:variable name="position" select="position()"/> |
|
142 |
<xsl:variable name="person" |
|
143 |
select="dnet:person($personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname, |
|
144 |
$originalPersonId, $dateOfCollection, $dateoftransformation, normalize-space(./*[local-name() = 'creatorName']), |
|
145 |
./*[local-name() = 'nameIdentifier'], ./*[local-name() = 'nameIdentifier']/@nameIdentifierScheme)"/> |
|
146 |
<xsl:variable name="persondataset" |
|
147 |
select="dnet:personDataset_Authorship($personId, $datasetId, $position, 'isAuthorOf', $provenance, $trust, $about)"/> |
|
148 |
<xsl:variable name="datasetperson" |
|
149 |
select="dnet:personDataset_Authorship($datasetId, $personId, $position, 'hasAuthor', $provenance, $trust, $about)"/> |
|
160 |
<!--DATASET RELATIONS--> |
|
161 |
<xsl:for-each |
|
162 |
select="//*[normalize-space(local-name())='relatedIdentifier' and normalize-space(./@entityType)='dataset']"> |
|
163 |
<xsl:variable name="relationSemantic" select="./@relationType"/> |
|
164 |
<xsl:variable name="inverseRelationSemantic" |
|
165 |
select="./@inverseRelationType"/> |
|
150 | 166 |
|
151 |
<ROW key="{$personId}" columnFamily="person"> |
|
152 |
<QUALIFIER name="body" type="base64"> |
|
153 |
<xsl:value-of select="$person"/> |
|
154 |
</QUALIFIER> |
|
155 |
</ROW> |
|
156 |
<ROW key="{$personId}" columnFamily="personDataset_authorship_isAuthorOf"> |
|
157 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
158 |
<xsl:value-of select="$persondataset"/> |
|
159 |
</QUALIFIER> |
|
160 |
</ROW> |
|
161 |
<ROW key="{$datasetId}" columnFamily="personDataset_authorship_hasAuthor"> |
|
162 |
<QUALIFIER name="{$personId}" type="base64"> |
|
163 |
<xsl:value-of select="$datasetperson"/> |
|
164 |
</QUALIFIER> |
|
165 |
</ROW> |
|
166 |
</xsl:if> |
|
167 |
</xsl:for-each> |
|
167 |
<!-- relatedDataset ids must be in the openaire format --> |
|
168 |
<xsl:variable name="relDatasetId"> |
|
169 |
<xsl:choose> |
|
170 |
<xsl:when test="./@relatedIdentifierType = 'dnet'"> |
|
171 |
<xsl:value-of select="concat('60|' , . )"/> |
|
172 |
</xsl:when> |
|
173 |
<xsl:otherwise> |
|
174 |
<xsl:value-of |
|
175 |
select="dnet:oafSimpleId('dataset', normalize-space(.))"/> |
|
176 |
</xsl:otherwise> |
|
177 |
</xsl:choose> |
|
178 |
</xsl:variable> |
|
168 | 179 |
|
169 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
170 | 180 |
|
171 |
<xsl:variable name="projectId" |
|
172 |
select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
181 |
<xsl:if test="string-length($datasetId) > 0"> |
|
182 |
<ROW> |
|
183 |
<xsl:attribute name="value"> |
|
184 |
<xsl:value-of |
|
185 |
select="dnet:createRel($relDatasetId, $datasetId, $inverseRelationSemantic, 'dataset_dataset', $provenance, $trust, $about)"/> |
|
186 |
</xsl:attribute> |
|
187 |
</ROW> |
|
188 |
<ROW> |
|
189 |
<xsl:attribute name="value"> |
|
190 |
<xsl:value-of |
|
191 |
select="dnet:createRel($datasetId, $relDatasetId, $relationSemantic, 'dataset_dataset', $provenance, $trust, $about)"/> |
|
192 |
</xsl:attribute> |
|
193 |
</ROW> |
|
173 | 194 |
|
174 |
<xsl:variable name="datasetproject" |
|
175 |
select="dnet:rel($datasetId, $projectId, 'datasetProject', 'outcome', 'isProducedBy', $provenance, $trust, $about)"/> |
|
176 |
<xsl:variable name="projectdataset" |
|
177 |
select="dnet:rel($projectId, $datasetId, 'datasetProject', 'outcome', 'produces', $provenance, $trust, $about)"/> |
|
195 |
</xsl:if> |
|
196 |
</xsl:for-each> |
|
178 | 197 |
|
179 |
<xsl:if test="string-length($projectId) > 0"> |
|
180 |
<ROW key="{$datasetId}" columnFamily="datasetProject_outcome_isProducedBy"> |
|
181 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
182 |
<xsl:value-of select="$datasetproject"/> |
|
183 |
</QUALIFIER> |
|
184 |
</ROW> |
|
185 |
<ROW key="{$projectId}" columnFamily="datasetProject_outcome_produces"> |
|
186 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
187 |
<xsl:value-of select="$projectdataset"/> |
|
188 |
</QUALIFIER> |
|
189 |
</ROW> |
|
190 |
</xsl:if> |
|
191 |
</xsl:for-each> |
|
192 | 198 |
|
193 |
<xsl:for-each |
|
194 |
select="//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE' and ./@relationType='IsPartOf']"> |
|
195 | 199 |
|
196 |
<!-- relatedDataset ids must be in the openaire format --> |
|
197 |
<xsl:variable name="targetId" select="dnet:oafSimpleId('dataset', normalize-space(.))"/> |
|
198 | 200 |
|
199 |
<xsl:if test="string-length($targetId) > 0"> |
|
200 | 201 |
|
201 |
<xsl:variable name="childParent" |
|
202 |
select="dnet:rel($datasetId, $targetId, 'datasetDataset', 'part', 'isPartOf', $provenance, $trust, $about)"/> |
|
203 |
<xsl:variable name="parentChild" |
|
204 |
select="dnet:rel($targetId, $datasetId, 'datasetDataset', 'part', 'hasPart', $provenance, $trust, $about)"/> |
|
205 | 202 |
|
206 |
<ROW key="{$datasetId}" columnFamily="datasetDataset_part_isPartOf"> |
|
207 |
<QUALIFIER name="{$targetId}" type="base64"> |
|
208 |
<xsl:value-of select="$childParent"/> |
|
209 |
</QUALIFIER> |
|
210 |
</ROW> |
|
211 |
<ROW key="{$targetId}" columnFamily="datasetDataset_part_hasPart"> |
|
212 |
<QUALIFIER name="{$datasetId}" type="base64"> |
|
213 |
<xsl:value-of select="$parentChild"/> |
|
214 |
</QUALIFIER> |
|
215 |
</ROW> |
|
216 |
</xsl:if> |
|
217 |
</xsl:for-each> |
|
218 | 203 |
|
219 | 204 |
</ROWS> |
220 | 205 |
</xsl:if> |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java | ||
---|---|---|
31 | 31 |
import eu.dnetlib.data.proto.DliProtos; |
32 | 32 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
33 | 33 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
34 |
import eu.dnetlib.data.proto.WdsDatasetProtos; |
|
34 | 35 |
import eu.dnetlib.miscutils.functional.xml.XMLIndenter; |
35 | 36 |
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner; |
36 | 37 |
import eu.dnetlib.pace.config.DedupConfig; |
... | ... | |
187 | 188 |
} |
188 | 189 |
|
189 | 190 |
@Test |
190 |
public void testParseWDSDaaaatacite() throws Exception {
|
|
191 |
public void testParseWDSDatacite() throws Exception { |
|
191 | 192 |
|
192 |
//doTest(loadFromTransformationProfile("dmfwds2hbase.xml"), load("recordWds.xml")); |
|
193 |
final List<Row> rows = Lists.newArrayList(); |
|
194 |
rows.addAll(asRows(loadFromTransformationProfile("dmfwds2hbase.xml"), load("inputWDSDMF.xml"))); |
|
195 |
|
|
196 |
rows.forEach(row -> |
|
197 |
row.getColumns().forEach( |
|
198 |
result -> { |
|
199 |
if (result != null) { |
|
200 |
|
|
201 |
|
|
202 |
final DNGFDecoder decoder = |
|
203 |
DNGFDecoder.decode(result.getValue(), WdsDatasetProtos.WdsDataset.geolocation, WdsDatasetProtos.WdsDataset.otherRels); |
|
204 |
System.out.println(row.getKey()); |
|
205 |
Put put = HBaseTableDAO.asPutByCollectedFrom(decoder.getDNGF()); |
|
206 |
System.out.println("put.getTimeStamp() = " + put.getTimeStamp()); |
|
207 |
|
|
208 |
System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString()); |
|
209 |
} |
|
210 |
} |
|
211 |
)); |
|
193 | 212 |
} |
194 | 213 |
|
195 | 214 |
|
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/inputWDSDMF.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
5 |
<oai:header> |
|
6 |
<dri:objIdentifier>r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier> |
|
7 |
<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier> |
|
8 |
<dri:dateOfCollection>2017-10-13T15:05:18.483+02:00</dri:dateOfCollection> |
|
9 |
<dri:repositoryId>d35a517d-341a-4ba2-85c5-31f43eb5e46b_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
10 |
<dri:datasourceprefix>r3d100010134</dri:datasourceprefix> |
|
11 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:pangaea.de:doi:10.1594/PANGAEA.432865</identifier> |
|
12 |
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2017-08-04T21:17:31Z</datestamp> |
|
13 |
</oai:header> |
|
14 |
<metadata xmlns="http://www.openarchives.org/OAI/2.0/"> |
|
15 |
<resource xmlns="http://datacite.org/schema/kernel-3" |
|
16 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
|
17 |
<identifier xmlns="" identifierType="DOI">10.1594/PANGAEA.432865</identifier> |
|
18 |
<creators xmlns=""> |
|
19 |
<creator> |
|
20 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
21 |
</creator> |
|
22 |
</creators> |
|
23 |
<titles xmlns=""> |
|
24 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research quality database)</title> |
|
25 |
</titles> |
|
26 |
<publisher xmlns="">PANGAEA - Data Publisher for Earth & Environmental Science</publisher> |
|
27 |
<publicationYear xmlns="">2006</publicationYear> |
|
28 |
<subjects xmlns=""> |
|
29 |
<subject subjectScheme="Parameter">DATE/TIME</subject> |
|
30 |
<subject subjectScheme="Parameter">Sea level</subject> |
|
31 |
<subject subjectScheme="Campaign">SeaLevel</subject> |
|
32 |
<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject> |
|
33 |
</subjects> |
|
34 |
<contributors xmlns=""> |
|
35 |
<contributor contributorType="HostingInstitution"> |
|
36 |
<contributorName>Sea Level Center, University of Hawaii</contributorName> |
|
37 |
</contributor> |
|
38 |
</contributors> |
|
39 |
<dates xmlns=""> |
|
40 |
<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date> |
|
41 |
</dates> |
|
42 |
<language xmlns="">eng</language> |
|
43 |
<resourceType xmlns="" resourceTypeGeneral="Dataset">Dataset</resourceType> |
|
44 |
<relatedIdentifiers xmlns=""> |
|
45 |
<relatedIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf" inverseRelationType="documents" |
|
46 |
relatedIdentifierType="URL" |
|
47 |
relationType="isDocumentedBy">http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt</relatedIdentifier> |
|
48 |
|
|
49 |
<relatedIdentifier entityType="publication" inverseRelationType="hasPart" relatedIdentifierType="dnet" |
|
50 |
relationType="isPartOf">wds_resolver::b0da752c99392f9bcf177afdc9c18ded</relatedIdentifier> |
|
51 |
|
|
52 |
</relatedIdentifiers> |
|
53 |
<sizes xmlns=""> |
|
54 |
<size>365 data points</size> |
|
55 |
</sizes> |
|
56 |
<formats xmlns=""> |
|
57 |
<format>text/tab-separated-values</format> |
|
58 |
</formats> |
|
59 |
<rightsList xmlns=""> |
|
60 |
<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 Unported (CC-BY)</rights> |
|
61 |
</rightsList> |
|
62 |
<geoLocations xmlns=""> |
|
63 |
<geoLocation> |
|
64 |
<geoLocationPoint>41.5233 -70.6717</geoLocationPoint> |
|
65 |
</geoLocation> |
|
66 |
</geoLocations> |
|
67 |
</resource> |
|
68 |
</metadata> |
|
69 |
<oai:about> |
|
70 |
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" |
|
71 |
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd"> |
|
72 |
<originDescription xmlns="" altered="true" harvestDate="2017-10-13T15:05:18.483+02:00"> |
|
73 |
<baseURL>http%3A%2F%2Fws.pangaea.de%2Foai%2Fprovider</baseURL> |
|
74 |
<identifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</identifier> |
|
75 |
<datestamp>2017-08-04T21:17:31Z</datestamp> |
|
76 |
<metadataNamespace/> |
|
77 |
</originDescription> |
|
78 |
</provenance> |
|
79 |
</oai:about> |
|
80 |
</oai:record> |
Also available in: Unified diff
Implemented mapping from DMF to proto for WDS