Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Lists;
7
import com.google.common.collect.Maps;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.ResultProtos.Result;
13
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
14
import eu.dnetlib.data.proto.TypeProtos.Type;
15
import org.w3c.dom.NamedNodeMap;
16
import org.w3c.dom.Node;
17
import org.w3c.dom.NodeList;
18

    
19
public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
20

    
21
	private static Map<String, String> mappingAccess = Maps.newHashMap();
22

    
23
	static {
24

    
25
		mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN");
26
		mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED");
27
		mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED");
28
		mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO");
29

    
30
		// Transformator now maps the access rights into proper values, not sure if it does for all datasets.
31
		mappingAccess.put("OPEN", "OPEN");
32
		mappingAccess.put("CLOSED", "CLOSED");
33
		mappingAccess.put("RESTRICTED", "RESTRICTED");
34
		mappingAccess.put("EMBARGO", "EMBARGO");
35

    
36
	}
37

    
38
	public static String odfResult(
39
			final String resultId,
40
			final NodeList about,
41
			final NodeList metadata,
42
			final NodeList titles,
43
			final NodeList subjects,
44
			final NodeList publisher,
45
			final NodeList descriptions,
46
			final NodeList dates,
47
			final NodeList dateaccepted,
48
			final NodeList resourceTypes,
49
			final NodeList formats,
50
			final NodeList sizes,
51
			final NodeList languages,
52
			final NodeList cobjcategory,
53
			final NodeList contributors,
54
			final NodeList rights,
55
			final NodeList version,
56
			final NodeList pidList,
57
			final String provenance,
58
			final String trust,
59
			final String hostedbyId,
60
			final String hostedbyName,
61
			final String collectedfromId,
62
			final String collectedfromName,
63
			final NodeList originalIds,
64
			final String instanceUri,
65
			final String dateOfCollection,
66
			final String dateOfTransformation) {
67

    
68
		try {
69
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
70

    
71
			final Result.Builder result = Result.newBuilder();
72
			Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder();
73

    
74
			// subject
75
			for (int i = 0; i < subjects.getLength(); i++) {
76
				Node currentNode = subjects.item(i);
77
				NodeList childNodes = currentNode.getChildNodes();
78
				if (childNodes.getLength() > 0) {
79
					String subjectValue = childNodes.item(0).getNodeValue();
80
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"),
81
							getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:result_subject", "dnet:result_subject"));
82
				}
83
			}
84

    
85
			// title
86
			for (int i = 0; i < titles.getLength(); i++) {
87
				Node currentNode = titles.item(i);
88
				NodeList childNodes = currentNode.getChildNodes();
89
				if (childNodes.getLength() > 0) {
90
					String titleValue = childNodes.item(0).getNodeValue();
91
					String classname = "main title";
92
					String classid = "main title";
93
					if (currentNode.hasAttributes()) {
94
						NamedNodeMap attributes = currentNode.getAttributes();
95
						Node titleType = attributes.getNamedItem("titleType");
96

    
97
						if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) {
98
							classname = "alternative title";
99
							classid = "alternative title";
100
						}
101
						if (titleType != null && titleType.getNodeValue().equals("Subtitle")) {
102
							classname = "subtitle";
103
							classid = "subtitle";
104
						}
105
						if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) {
106
							classname = "translated title";
107
							classid = "translated title";
108
						}
109
					}
110
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"),
111
							getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title"));
112
				}
113
			}
114

    
115
			// description
116
			for (int i = 0; i < descriptions.getLength(); i++) {
117
				Node currentNode = descriptions.item(i);
118
				if (currentNode != null && currentNode.hasChildNodes()) {
119
					String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
120
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
121
				}
122
			}
123

    
124
			// contributors
125
			for (int i = 0; i < contributors.getLength(); i++) {
126
				Node currentNode = contributors.item(i);
127
				if (currentNode != null && currentNode.hasChildNodes()) {
128
					String contributorValue = currentNode.getChildNodes().item(0).getNodeValue();
129
					addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
130
				}
131
			}
132

    
133
			// publisher
134
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
135

    
136
			// dates
137
			for (int i = 0; i < dates.getLength(); i++) {
138
				Node currentNode = dates.item(i);
139
				if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) {
140
					String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue();
141
					String dateValue = currentNode.getChildNodes().item(0).getNodeValue();
142
					String protoAttribute = "relevantdate";
143
					if ("Accepted".equals(dateAttribute)) {
144
						protoAttribute = "dateofacceptance";
145
					} else if ("Issued".equals(dateAttribute)) {
146
						protoAttribute = "storagedate";
147
					} else if ("Updated".equals(dateAttribute)) {
148
						protoAttribute = "lastmetadataupdate";
149
					} else if ("Available".equals(dateAttribute)) {
150
						protoAttribute = "embargoenddate";
151
					}
152
					if (protoAttribute.equals("relevantdate") == false) {
153
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue);
154
					} else {
155
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute),
156
								getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date"));
157
					}
158
				}
159
			}
160

    
161
			// dateofacceptance
162
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
163

    
164
			// size
165
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
166

    
167
			// format
168
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
169

    
170
			// version
171
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version));
172

    
173
			// language
174
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"),
175
					setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
176

    
177
			//resource type
178
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"),
179
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes))));
180

    
181
			// resultType
182
			addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier("dataset", "dnet:result_typologies"));
183

    
184
			String tmpID;
185
			String TmpName;
186

    
187
			if (hostedbyId == null && hostedbyName == null) {
188
				tmpID = collectedfromId;
189
				TmpName = collectedfromName;
190
			} else {
191
				tmpID = hostedbyId;
192
				TmpName = hostedbyName;
193
			}
194

    
195
			final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(tmpID, TmpName));
196

    
197
			String tmpRigths = "UNKNOWN";
198
			final String firstRight = getFirstItem(rights);
199
			if (mappingAccess.containsKey(firstRight)) {
200
				tmpRigths = mappingAccess.get(firstRight);
201
			}
202

    
203
			addField(instance, Instance.getDescriptor().findFieldByName("licence"),
204
					setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths)));
205

    
206
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
207
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(cobjcategory))));
208

    
209
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
210

    
211
			result.addInstance(instance);
212

    
213
			List<StructuredProperty> pids = parsePids(pidList);
214

    
215
			// original ids
216
			final List<String> originalIdList = Lists.newArrayList();
217
			for (int i = 0; i < originalIds.getLength(); i++) {
218
				Node currentNode = originalIds.item(i);
219
				if (currentNode != null && currentNode.hasChildNodes()) {
220
					originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue());
221
				}
222
			}
223

    
224
			OafEntity.Builder entity =
225
					getEntity(Type.result, entityId, getKV(collectedfromId, collectedfromName), originalIdList, dateOfCollection, dateOfTransformation, pids).setResult(
226
							result.setMetadata(metadataProto));
227

    
228
			entity.setOaiprovenance(getOAIProvenance(about));
229

    
230
			Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
231
			return base64(oaf.toByteArray());
232
		} catch (Exception e) {
233
			e.printStackTrace(System.err);
234
			throw new RuntimeException(e);
235
		}
236

    
237
	}
238

    
239
	public static String getFirstItem(final NodeList list) {
240
		String out = "";
241
		if (list != null) {
242

    
243
			if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) {
244
				out = list.item(0).getChildNodes().item(0).getNodeValue();
245
			}
246
		}
247
		return out;
248
	}
249

    
250
}
(7-7/8)