Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import org.w3c.dom.NamedNodeMap;
7
import org.w3c.dom.Node;
8
import org.w3c.dom.NodeList;
9

    
10
import com.google.common.base.Joiner;
11
import com.google.common.collect.Lists;
12
import com.google.common.collect.Maps;
13

    
14
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
16
import eu.dnetlib.data.proto.OafProtos.Oaf;
17
import eu.dnetlib.data.proto.OafProtos.OafEntity;
18
import eu.dnetlib.data.proto.OafProtos.OafRel;
19
import eu.dnetlib.data.proto.PersonProtos.Person;
20
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult;
21
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
22
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
23
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
24
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
25
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
26
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
27
import eu.dnetlib.data.proto.ResultProtos.Result;
28
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
29
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
30
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
31
import eu.dnetlib.data.proto.TypeProtos.Type;
32

    
33
public class DataciteToHbaseXsltFunctions extends AbstractDNetOafXsltFunctions {
34

    
35
	private static Map<String, String> mappingAccess = Maps.newHashMap();
36

    
37
	static {
38

    
39
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
40
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
41
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
42
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
43

    
44
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
45
		mappingAccess.put("OPEN", "OPEN");
46
		mappingAccess.put("CLOSED", "CLOSED");
47
		mappingAccess.put("RESTRICTED", "RESTRICTED");
48
		mappingAccess.put("EMBARGO", "EMBARGO");
49

    
50
	}
51

    
52
	public static String oafResult_FromDatacite(final String resultId,
53
			final NodeList dataInfo,
54
			final NodeList metadata,
55
			final NodeList titles,
56
			final NodeList subjects,
57
			final NodeList publisher,
58
			final NodeList descriptions,
59
			final NodeList dates,
60
			final NodeList dateaccepted,
61
			final NodeList resourceTypes,
62
			final NodeList formats,
63
			final NodeList sizes,
64
			final NodeList languages,
65
			final NodeList cobjcategory,
66
			final NodeList rights,
67
			final NodeList version,
68
			final String provenance,
69
			final String trust,
70
			final String hostedbyId,
71
			final String hostedbyName,
72
			final String collectedfromId,
73
			final String collectedfromName,
74
			final String originalId,
75
			final String instanceUri,
76
			final String dateOfCollection) {
77

    
78
		try {
79
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
80

    
81
			final Result.Builder result = Result.newBuilder();
82
			Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder();
83

    
84
			// subject
85
			for (int i = 0; i < subjects.getLength(); i++) {
86
				Node currentNode = subjects.item(i);
87
				NodeList childNodes = currentNode.getChildNodes();
88
				if (childNodes.getLength() > 0) {
89
					String subjectValue = childNodes.item(0).getNodeValue();
90
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"),
91
							getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:result_subject", "dnet:result_subject"));
92
				}
93
			}
94

    
95
			// title
96
			for (int i = 0; i < titles.getLength(); i++) {
97
				Node currentNode = titles.item(i);
98
				NodeList childNodes = currentNode.getChildNodes();
99
				if (childNodes.getLength() > 0) {
100
					String titleValue = childNodes.item(0).getNodeValue();
101
					String classname = "main title";
102
					String classid = "main title";
103
					if (currentNode.hasAttributes()) {
104
						NamedNodeMap attributes = currentNode.getAttributes();
105
						Node titleType = attributes.getNamedItem("titleType");
106

    
107
						if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) {
108
							classname = "alternative title";
109
							classid = "alternative title";
110
						}
111
						if (titleType != null && titleType.getNodeValue().equals("Subtitle")) {
112
							classname = "subtitle";
113
							classid = "subtitle";
114
						}
115
						if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) {
116
							classname = "translated title";
117
							classid = "translated title";
118
						}
119
					}
120
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"),
121
							getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title"));
122
				}
123
			}
124

    
125
			// description
126
			for (int i = 0; i < descriptions.getLength(); i++) {
127
				Node currentNode = descriptions.item(i);
128
				if (currentNode != null && currentNode.hasChildNodes()) {
129
					String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
130
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
131
				}
132
			}
133

    
134
			// publisher
135
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
136

    
137
			// dates
138
			for (int i = 0; i < dates.getLength(); i++) {
139
				Node currentNode = dates.item(i);
140
				if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) {
141
					String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue();
142
					String dateValue = currentNode.getChildNodes().item(0).getNodeValue();
143
					String protoAttribute = "relevantdate";
144
					if ("Accepted".equals(dateAttribute)) {
145
						protoAttribute = "dateofacceptance";
146
					} else if ("Issued".equals(dateAttribute)) {
147
						protoAttribute = "storagedate";
148
					} else if ("Updated".equals(dateAttribute)) {
149
						protoAttribute = "lastmetadataupdate";
150
					} else if ("Available".equals(dateAttribute)) {
151
						protoAttribute = "embargoenddate";
152
					}
153
					if (protoAttribute.equals("relevantdate") == false) {
154
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue);
155
					} else {
156
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute),
157
								getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date"));
158
					}
159
				}
160
			}
161

    
162
			// dateofacceptance
163
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
164

    
165
			// size
166
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
167

    
168
			// format
169
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
170

    
171
			// version
172
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version));
173

    
174
			// language
175
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"),
176
					setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
177

    
178
			// resultType
179
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier("dataset", "dnet:result_typologies"));
180

    
181
			String tmpID;
182
			String TmpName;
183

    
184
			if (hostedbyId == null && hostedbyName == null) {
185
				tmpID = collectedfromId;
186
				TmpName = collectedfromName;
187
			} else {
188
				tmpID = hostedbyId;
189
				TmpName = hostedbyName;
190
			}
191

    
192
			final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(tmpID, TmpName));
193

    
194
			String tmpRigths = "UNKNOWN";
195
			final String firstRight = getFirstItem(rights);
196
			if (mappingAccess.containsKey(firstRight)) {
197
				tmpRigths = mappingAccess.get(firstRight);
198
			}
199

    
200
			addField(instance, Instance.getDescriptor().findFieldByName("licence"),
201
					setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths)));
202

    
203
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
204
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(cobjcategory))));
205

    
206
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
207

    
208
			result.addInstance(instance);
209

    
210
			List<StructuredProperty> pids = Lists.newArrayList();
211
			OafEntity.Builder entity =
212
					getEntity(Type.result, entityId, getKV(collectedfromId, collectedfromName), originalId, dateOfCollection, pids).setResult(
213
							result.setMetadata(metadataProto));
214
			Oaf oaf = getOaf(entity, getDataInfo(dataInfo, provenance, trust, false, false));
215
			return base64(oaf.toByteArray());
216
		} catch (Exception e) {
217
			e.printStackTrace(System.err);
218
			throw new RuntimeException(e);
219
		}
220

    
221
	}
222

    
223
	// dnet:dnet:oafResultResultFromMDStore($relatedId, $resultId)
224
	public static String oafResultResult_PublicationDataset_FromDatacite(final String source,
225
			final String target,
226
			final String relClass,
227
			final String provenanceAction,
228
			final String trust,
229
			final NodeList dataInfo) {
230
		try {
231
			final String eSource = OafRowKeyDecoder.decode(source).getKey();
232
			final String eTarget = OafRowKeyDecoder.decode(target).getKey();
233

    
234
			final RelMetadata.Builder metadata = RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_result_relations"));
235
			final PublicationDataset.Builder builder = PublicationDataset.newBuilder().setRelMetadata(metadata);
236

    
237
			OafRel.Builder rel =
238
					getRel(eSource, eTarget, RelType.resultResult, SubRelType.publicationDataset, relClass, false).setResultResult(
239
							ResultResult.newBuilder().setPublicationDataset(builder));
240

    
241
			return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray());
242
		} catch (Exception e) {
243
			e.printStackTrace(System.err);
244
			throw new RuntimeException(e);
245
		}
246
	}
247

    
248
	public static String getFirstItem(final NodeList list) {
249
		String out = "";
250
		if (list != null) {
251

    
252
			if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) {
253
				out = list.item(0).getChildNodes().item(0).getNodeValue();
254
			}
255
		}
256
		return out;
257
	}
258

    
259
	// dnet:oafResultProjectFromDMF($resultId, $projectId, "sysimport:crosswalk:repository", "0.9")
260
	public static String oafResultProject_Outcome_FromDatacite(final String source,
261
			final String target,
262
			final String relClass,
263
			final String provenanceAction,
264
			final String trust,
265
			final NodeList dataInfo) {
266
		try {
267
			final String eSource = OafRowKeyDecoder.decode(source).getKey();
268
			final String eTarget = OafRowKeyDecoder.decode(target).getKey();
269

    
270
			final RelMetadata.Builder metadata = RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_project_relations"));
271
			final Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(metadata);
272

    
273
			OafRel.Builder rel =
274
					getRel(eSource, eTarget, RelType.resultProject, SubRelType.outcome, relClass, false).setResultProject(
275
							ResultProject.newBuilder().setOutcome(outcome));
276

    
277
			return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray());
278
		} catch (Exception e) {
279
			e.printStackTrace(System.err);
280
			throw new RuntimeException(e);
281
		}
282
	}
283

    
284
	public static String oafPerson_FromDatacite(final String personId,
285
			final NodeList dataInfo,
286
			final String fullname,
287
			final String provenanceAction,
288
			final String trust,
289
			final String collectedfromId,
290
			final String collectedfromName,
291
			final String originalId,
292
			final String dateOfCollection) {
293

    
294
		final String entityId = OafRowKeyDecoder.decode(personId).getKey();
295
		try {
296
			final Person.Builder person = Person.newBuilder();
297
			final Person.Metadata.Builder metadata = Person.Metadata.newBuilder();
298

    
299
			metadata.setFullname(sf(fullname));
300

    
301
			eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
302
			if (p.isAccurate()) {
303
				metadata.setFirstname(sf(Joiner.on(" ").join(p.getName())));
304
				metadata.clearSecondnames();
305
				for (String s : p.getSurname()) {
306
					metadata.addSecondnames(sf(s));
307
				}
308
				metadata.setFullname(sf(p.getNormalisedFullname()));
309
			}
310

    
311
			// metadata.setNationality(getSimpleQualifier("UNKNOWN", "dnet:countries"));
312
			List<StructuredProperty> pids = Lists.newArrayList();
313
			OafEntity.Builder entity =
314
					getEntity(Type.person, entityId, getKV(collectedfromId, collectedfromName), originalId, dateOfCollection, pids).setPerson(
315
							person.setMetadata(metadata));
316

    
317
			return base64(getOaf(entity, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray());
318
		} catch (Exception e) {
319
			e.printStackTrace(System.err);
320
			throw new RuntimeException(e);
321
		}
322
	}
323

    
324
	public static String oafPersonResult_Authorship_FromDatacite(final String source,
325
			final String target,
326
			final int rank,
327
			final String relClass,
328
			final String provenanceAction,
329
			final String trust,
330
			final NodeList dataInfo) {
331
		try {
332
			final String eSource = OafRowKeyDecoder.decode(source).getKey();
333
			final String eTarget = OafRowKeyDecoder.decode(target).getKey();
334

    
335
			final RelMetadata.Builder metadata = RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:personroles"));
336
			final Authorship.Builder auth = Authorship.newBuilder().setRanking("" + rank).setRelMetadata(metadata);
337

    
338
			OafRel.Builder rel =
339
					getRel(eSource, eTarget, RelType.personResult, SubRelType.authorship, relClass, false).setPersonResult(
340
							PersonResult.newBuilder().setAuthorship(auth));
341

    
342
			return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray());
343
		} catch (Exception e) {
344
			e.printStackTrace(System.err);
345
			throw new RuntimeException(e);
346
		}
347
	}
348

    
349
}
(5-5/9)