Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5
import java.util.Set;
6

    
7
import com.google.common.collect.Lists;
8
import com.google.common.collect.Maps;
9
import com.google.common.collect.Sets;
10
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
11
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
12
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
13
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafEntity;
16
import eu.dnetlib.data.proto.ResultProtos.Result;
17
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
18
import eu.dnetlib.data.proto.TypeProtos.Type;
19
import org.w3c.dom.NamedNodeMap;
20
import org.w3c.dom.Node;
21
import org.w3c.dom.NodeList;
22

    
23
public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

    
25
	private static Map<String, String> mappingAccess = Maps.newHashMap();
26

    
27
	static {
28

    
29
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
30
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
31
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
32
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
33

    
34
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
35
		mappingAccess.put("OPEN", "OPEN");
36
		mappingAccess.put("CLOSED", "CLOSED");
37
		mappingAccess.put("RESTRICTED", "RESTRICTED");
38
		mappingAccess.put("EMBARGO", "EMBARGO");
39

    
40
	}
41

    
42
	public static String odfResult(
43
			final String resultId,
44
			final boolean invisible,
45
			final NodeList about,
46
			final NodeList metadata,
47
			final NodeList titles,
48
			final NodeList creators,
49
			final NodeList subjects,
50
			final NodeList publisher,
51
			final NodeList descriptions,
52
			final NodeList dates,
53
			final NodeList dateaccepted,
54
			final NodeList resourceTypes,
55
			final NodeList formats,
56
			final NodeList sizes,
57
			final NodeList languages,
58
			final NodeList cobjcategory,
59
			final NodeList contributors,
60
			final NodeList rights,
61
			final NodeList license,
62
			final NodeList version,
63
			final NodeList pidList,
64
			final String provenance,
65
			final String trust,
66
			final String hostedbyId,
67
			final String hostedbyName,
68
			final String collectedfromId,
69
			final String collectedfromName,
70
			final NodeList originalIds,
71
			final String instanceUri,
72
			final String dateOfCollection,
73
			final String dateOfTransformation) {
74

    
75
		try {
76
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
77

    
78
			final Result.Builder result = Result.newBuilder();
79
			Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder();
80

    
81
			// subject
82
			for (int i = 0; i < subjects.getLength(); i++) {
83
				Node currentNode = subjects.item(i);
84
				NodeList childNodes = currentNode.getChildNodes();
85
				if (childNodes.getLength() > 0) {
86
					String subjectValue = childNodes.item(0).getNodeValue();
87
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"),
88
							getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies"));
89
				}
90
			}
91

    
92
			// title
93
			for (int i = 0; i < titles.getLength(); i++) {
94
				Node currentNode = titles.item(i);
95
				NodeList childNodes = currentNode.getChildNodes();
96
				if (childNodes.getLength() > 0) {
97
					String titleValue = childNodes.item(0).getNodeValue();
98
					String classname = "main title";
99
					String classid = "main title";
100
					if (currentNode.hasAttributes()) {
101
						NamedNodeMap attributes = currentNode.getAttributes();
102
						Node titleType = attributes.getNamedItem("titleType");
103

    
104
						if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) {
105
							classname = "alternative title";
106
							classid = "alternative title";
107
						}
108
						if (titleType != null && titleType.getNodeValue().equals("Subtitle")) {
109
							classname = "subtitle";
110
							classid = "subtitle";
111
						}
112
						if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) {
113
							classname = "translated title";
114
							classid = "translated title";
115
						}
116
					}
117
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"),
118
							getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title"));
119
				}
120
			}
121

    
122
			// creators
123
			for (int i = 0; i < creators.getLength(); i++) {
124
				Node currentNode = creators.item(i);
125
				if (currentNode != null && currentNode.hasChildNodes()) {
126
					String fullname = currentNode.getChildNodes().item(0).getNodeValue();
127

    
128
					final Author.Builder author = Author.newBuilder();
129

    
130
					author.setFullname(fullname);
131
					author.setRank(i+1);
132

    
133
					final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
134
					if (p.isAccurate()) {
135
						author.setName(p.getNormalisedFirstName());
136
						author.setSurname(p.getNormalisedSurname());
137
					}
138
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("author"), author);
139
				}
140

    
141
			}
142

    
143
			// description
144
			for (int i = 0; i < descriptions.getLength(); i++) {
145
				Node currentNode = descriptions.item(i);
146
				if (currentNode != null && currentNode.hasChildNodes()) {
147
					String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
148
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
149
				}
150
			}
151

    
152
			// contributors
153
			for (int i = 0; i < contributors.getLength(); i++) {
154
				Node currentNode = contributors.item(i);
155
				if (currentNode != null && currentNode.hasChildNodes()) {
156
					String contributorValue = currentNode.getChildNodes().item(0).getNodeValue();
157
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
158
				}
159
			}
160

    
161
			// publisher
162
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
163

    
164
			// dates
165
			for (int i = 0; i < dates.getLength(); i++) {
166
				Node currentNode = dates.item(i);
167
				if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) {
168
					String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue();
169
					String dateValue = currentNode.getChildNodes().item(0).getNodeValue();
170
					String protoAttribute = "relevantdate";
171
					if ("Accepted".equals(dateAttribute)) {
172
						protoAttribute = "dateofacceptance";
173
					} else if ("Issued".equals(dateAttribute)) {
174
						protoAttribute = "storagedate";
175
					} else if ("Updated".equals(dateAttribute)) {
176
						protoAttribute = "lastmetadataupdate";
177
					} else if ("Available".equals(dateAttribute)) {
178
						protoAttribute = "embargoenddate";
179
					}
180
					if (protoAttribute.equals("relevantdate") == false) {
181
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue);
182
					} else {
183
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute),
184
								getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date"));
185
					}
186
				}
187
			}
188

    
189
			// dateofacceptance
190
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
191

    
192
			// size
193
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
194

    
195
			// format
196
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
197

    
198
			// version
199
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version));
200

    
201
			// language
202
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"),
203
					setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
204

    
205
			//resource type
206
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"),
207
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes))));
208

    
209
			// resultType
210
			final String cobjcategoryCode = getFirstItem(cobjcategory);
211

    
212
			String resulttype = "";
213
			switch (cobjcategoryCode) {
214
			case "0029":
215
				resulttype = "software";
216
				break;
217
			default:
218
				resulttype = "dataset";
219
			}
220

    
221
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
222

    
223
			String tmpID;
224
			String TmpName;
225

    
226
			if (hostedbyId == null && hostedbyName == null) {
227
				tmpID = collectedfromId;
228
				TmpName = collectedfromName;
229
			} else {
230
				tmpID = hostedbyId;
231
				TmpName = hostedbyName;
232
			}
233

    
234
			final KeyValue collectedFrom = getKV(collectedfromId, collectedfromName);
235

    
236
			final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(tmpID, TmpName));
237

    
238
			String tmpRigths = "UNKNOWN";
239
			final String firstRight = getFirstItem(rights);
240
			if (mappingAccess.containsKey(firstRight)) {
241
				tmpRigths = mappingAccess.get(firstRight);
242
			}
243

    
244
			addField(instance, Instance.getDescriptor().findFieldByName("license"), getFirstItem(license));
245

    
246
			addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
247
					setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths)));
248

    
249
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
250
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(cobjcategoryCode)));
251

    
252
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
253

    
254
				addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
255
			addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
256

    
257
			result.addInstance(instance);
258

    
259
			List<StructuredProperty> pids = parsePids(pidList);
260

    
261
			// original ids
262
			final Set<String> originalIdList = Sets.newHashSet();
263
			for (int i = 0; i < originalIds.getLength(); i++) {
264
				Node currentNode = originalIds.item(i);
265
				if (currentNode != null && currentNode.hasChildNodes()) {
266
					originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue());
267
				}
268
			}
269

    
270
			OafEntity.Builder entity =
271
					getEntity(Type.result, entityId, collectedFrom, originalIdList, dateOfCollection, dateOfTransformation, pids).setResult(
272
							result.setMetadata(metadataProto));
273

    
274
			entity.setOaiprovenance(getOAIProvenance(about));
275

    
276
			Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
277
			return base64(oaf.toByteArray());
278
		} catch (Exception e) {
279
			e.printStackTrace(System.err);
280
			throw new RuntimeException(e);
281
		}
282

    
283
	}
284

    
285
	public static String getFirstItem(final NodeList list) {
286
		String out = "";
287
		if (list != null) {
288

    
289
			if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) {
290
				out = list.item(0).getChildNodes().item(0).getNodeValue();
291
			}
292
		}
293
		return out;
294
	}
295

    
296
}
(8-8/10)