Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.transform.xml;
2
3
import java.util.List;
4
import java.util.Map;
5
6
import com.google.common.collect.Lists;
7
import com.google.common.collect.Maps;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9 28092 claudio.at
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
10 26600 sandro.lab
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.ResultProtos.Result;
13
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
14
import eu.dnetlib.data.proto.TypeProtos.Type;
15 40014 claudio.at
import org.w3c.dom.NamedNodeMap;
16
import org.w3c.dom.Node;
17
import org.w3c.dom.NodeList;
18 26600 sandro.lab
19 40198 claudio.at
public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
20 26600 sandro.lab
21
	private static Map<String, String> mappingAccess = Maps.newHashMap();
22
23
	static {
24
25
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
26
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
27
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
28
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
29
30 33325 claudio.at
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
31
		mappingAccess.put("OPEN", "OPEN");
32
		mappingAccess.put("CLOSED", "CLOSED");
33
		mappingAccess.put("RESTRICTED", "RESTRICTED");
34
		mappingAccess.put("EMBARGO", "EMBARGO");
35
36 26600 sandro.lab
	}
37
38 40198 claudio.at
	public static String odfResult(
39
			final String resultId,
40
			final NodeList about,
41 26600 sandro.lab
			final NodeList metadata,
42
			final NodeList titles,
43
			final NodeList subjects,
44
			final NodeList publisher,
45
			final NodeList descriptions,
46
			final NodeList dates,
47 30969 claudio.at
			final NodeList dateaccepted,
48 26600 sandro.lab
			final NodeList resourceTypes,
49
			final NodeList formats,
50
			final NodeList sizes,
51
			final NodeList languages,
52 31200 claudio.at
			final NodeList cobjcategory,
53 41526 claudio.at
			final NodeList contributors,
54 26600 sandro.lab
			final NodeList rights,
55
			final NodeList version,
56 40014 claudio.at
			final NodeList pidList,
57 26600 sandro.lab
			final String provenance,
58
			final String trust,
59
			final String hostedbyId,
60
			final String hostedbyName,
61
			final String collectedfromId,
62
			final String collectedfromName,
63 40014 claudio.at
			final NodeList originalIds,
64 26600 sandro.lab
			final String instanceUri,
65 40313 claudio.at
			final String dateOfCollection,
66
			final String dateOfTransformation) {
67 26600 sandro.lab
68
		try {
69
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
70
71
			final Result.Builder result = Result.newBuilder();
72
			Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder();
73
74
			// subject
75
			for (int i = 0; i < subjects.getLength(); i++) {
76
				Node currentNode = subjects.item(i);
77
				NodeList childNodes = currentNode.getChildNodes();
78
				if (childNodes.getLength() > 0) {
79
					String subjectValue = childNodes.item(0).getNodeValue();
80
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"),
81 44552 claudio.at
							getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies"));
82 26600 sandro.lab
				}
83
			}
84
85
			// title
86
			for (int i = 0; i < titles.getLength(); i++) {
87
				Node currentNode = titles.item(i);
88
				NodeList childNodes = currentNode.getChildNodes();
89
				if (childNodes.getLength() > 0) {
90
					String titleValue = childNodes.item(0).getNodeValue();
91
					String classname = "main title";
92
					String classid = "main title";
93
					if (currentNode.hasAttributes()) {
94
						NamedNodeMap attributes = currentNode.getAttributes();
95
						Node titleType = attributes.getNamedItem("titleType");
96 28092 claudio.at
97 33325 claudio.at
						if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) {
98 26600 sandro.lab
							classname = "alternative title";
99
							classid = "alternative title";
100
						}
101 33325 claudio.at
						if (titleType != null && titleType.getNodeValue().equals("Subtitle")) {
102 26600 sandro.lab
							classname = "subtitle";
103
							classid = "subtitle";
104
						}
105 33325 claudio.at
						if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) {
106 26600 sandro.lab
							classname = "translated title";
107
							classid = "translated title";
108
						}
109
					}
110
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"),
111
							getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title"));
112
				}
113
			}
114
115
			// description
116
			for (int i = 0; i < descriptions.getLength(); i++) {
117
				Node currentNode = descriptions.item(i);
118 33325 claudio.at
				if (currentNode != null && currentNode.hasChildNodes()) {
119 26600 sandro.lab
					String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
120
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
121
				}
122
			}
123
124 41526 claudio.at
			// contributors
125
			for (int i = 0; i < contributors.getLength(); i++) {
126
				Node currentNode = contributors.item(i);
127
				if (currentNode != null && currentNode.hasChildNodes()) {
128
					String contributorValue = currentNode.getChildNodes().item(0).getNodeValue();
129
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
130
				}
131
			}
132
133 26600 sandro.lab
			// publisher
134
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
135
136
			// dates
137
			for (int i = 0; i < dates.getLength(); i++) {
138
				Node currentNode = dates.item(i);
139 33325 claudio.at
				if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) {
140 26600 sandro.lab
					String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue();
141
					String dateValue = currentNode.getChildNodes().item(0).getNodeValue();
142
					String protoAttribute = "relevantdate";
143
					if ("Accepted".equals(dateAttribute)) {
144
						protoAttribute = "dateofacceptance";
145
					} else if ("Issued".equals(dateAttribute)) {
146
						protoAttribute = "storagedate";
147
					} else if ("Updated".equals(dateAttribute)) {
148
						protoAttribute = "lastmetadataupdate";
149
					} else if ("Available".equals(dateAttribute)) {
150
						protoAttribute = "embargoenddate";
151
					}
152
					if (protoAttribute.equals("relevantdate") == false) {
153
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue);
154
					} else {
155
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute),
156
								getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date"));
157
					}
158
				}
159
			}
160
161 30969 claudio.at
			// dateofacceptance
162
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
163
164 26600 sandro.lab
			// size
165
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
166
167
			// format
168
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
169
170
			// version
171
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version));
172
173
			// language
174
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"),
175
					setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
176
177 42500 alessia.ba
			//resource type
178
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"),
179
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes))));
180
181 26600 sandro.lab
			// resultType
182 47480 claudio.at
			final String cobjcategoryCode = getFirstItem(cobjcategory);
183
			String resulttype = "";
184
			switch (cobjcategoryCode) {
185
			// add here the code to be excluded from the default mapping as 'dataset'
186 26600 sandro.lab
187 47480 claudio.at
			/*
188
			case "0029":
189
				resulttype = "software";
190
				break;
191
			*/
192
			default:
193
				resulttype = "dataset";
194
			}
195
196
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
197
198 26600 sandro.lab
			String tmpID;
199
			String TmpName;
200
201 33325 claudio.at
			if (hostedbyId == null && hostedbyName == null) {
202 26600 sandro.lab
				tmpID = collectedfromId;
203
				TmpName = collectedfromName;
204
			} else {
205
				tmpID = hostedbyId;
206
				TmpName = hostedbyName;
207
			}
208
209
			final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(tmpID, TmpName));
210
211
			String tmpRigths = "UNKNOWN";
212
			final String firstRight = getFirstItem(rights);
213
			if (mappingAccess.containsKey(firstRight)) {
214
				tmpRigths = mappingAccess.get(firstRight);
215
			}
216
217
			addField(instance, Instance.getDescriptor().findFieldByName("licence"),
218
					setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths)));
219
220
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
221 47480 claudio.at
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(cobjcategoryCode)));
222 31200 claudio.at
223 26600 sandro.lab
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
224
225
			result.addInstance(instance);
226
227 40014 claudio.at
			List<StructuredProperty> pids = parsePids(pidList);
228
229
			// original ids
230
			final List<String> originalIdList = Lists.newArrayList();
231
			for (int i = 0; i < originalIds.getLength(); i++) {
232
				Node currentNode = originalIds.item(i);
233
				if (currentNode != null && currentNode.hasChildNodes()) {
234
					originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue());
235
				}
236
			}
237
238 30969 claudio.at
			OafEntity.Builder entity =
239 40313 claudio.at
					getEntity(Type.result, entityId, getKV(collectedfromId, collectedfromName), originalIdList, dateOfCollection, dateOfTransformation, pids).setResult(
240 30969 claudio.at
							result.setMetadata(metadataProto));
241 40198 claudio.at
242
			entity.setOaiprovenance(getOAIProvenance(about));
243
244
			Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
245 26600 sandro.lab
			return base64(oaf.toByteArray());
246
		} catch (Exception e) {
247
			e.printStackTrace(System.err);
248
			throw new RuntimeException(e);
249
		}
250
251
	}
252
253
	public static String getFirstItem(final NodeList list) {
254
		String out = "";
255
		if (list != null) {
256
257 33325 claudio.at
			if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) {
258 26600 sandro.lab
				out = list.item(0).getChildNodes().item(0).getNodeValue();
259
			}
260
		}
261
		return out;
262
	}
263
264
}