Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.HashMap;
4
import java.util.List;
5
import java.util.Map;
6

    
7
import com.google.common.collect.Iterables;
8
import com.google.common.collect.Lists;
9
import com.google.protobuf.Descriptors.Descriptor;
10
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
11
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
12
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
13
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafEntity;
16
import eu.dnetlib.data.proto.ResultProtos.Result;
17
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
18
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
21
import eu.dnetlib.data.proto.TypeProtos.Type;
22
import org.apache.commons.lang3.StringUtils;
23
import org.w3c.dom.NodeList;
24

    
25
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
26

    
27
	public static String oafResult(
28
			final String resultId,
29
			final boolean invisible,
30
			final String provenance,
31
			final String trust,
32
			final NodeList about,
33
			final String originalId,
34
			final String dateOfCollection,
35
			final String dateOfTransformation,
36
			final NodeList metadata) {
37

    
38
		ValueMap values = null;
39
		try {
40
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
41
			values = ValueMap.parseNodeList(metadata);
42
			final Descriptor mDesc = Result.Metadata.getDescriptor();
43

    
44
			final List<KeyValue> collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource);
45
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
46

    
47
			final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc);
48
			final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy);
49
			final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId);
50
			entity.setDateofcollection(dateOfCollection)
51
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
52

    
53
			final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
54
			return base64(oaf.toByteArray());
55
		} catch (final Throwable e) {
56
			handleException(e, resultId, values);
57
		}
58
		return null;
59
	}
60

    
61
	public static String oafResultUpdate(final String resultId,
62
			final String provenance,
63
			final String trust,
64
			final NodeList nodelist) {
65
		ValueMap values = null;
66
		try {
67
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
68
			values = ValueMap.parseNodeList(nodelist);
69
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
70

    
71
			final Descriptor mDesc = Result.Metadata.getDescriptor();
72

    
73
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
74
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
75

    
76
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
77
			final Oaf oaf = getOaf(entity, null);
78
			return base64(oaf.toByteArray());
79
		} catch (final Throwable e) {
80
			handleException(e, resultId, values);
81
		}
82
		return null;
83
	}
84

    
85
	private static OafEntity.Builder buildOafEntity(
86
			final Result.Builder result,
87
			final String entityId,
88
			final NodeList nodelist,
89
			final List<KeyValue> collectedFrom,
90
			final String originalId) {
91

    
92
		final List<StructuredProperty> pids = Lists.newArrayList();
93
		pids.addAll(parsePids(nodelist));
94

    
95
		final OafEntity.Builder entity =
96
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
97
						.setResult(result);
98
		return entity;
99
	}
100

    
101
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
102
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
103

    
104
		if (values.get("creator") != null) {
105
			for (final Element e : values.get("creator")) {
106

    
107
				final Author.Builder author = Author.newBuilder();
108

    
109
				final String fullname = e.getText();
110
				author.setFullname(fullname);
111
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
112

    
113
				final String nameIdentifier = e.getAttributeValue("nameIdentifier");
114
				final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme");
115

    
116
				if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) {
117
					author.addPid(getKV(nameIdentifierScheme, nameIdentifier));
118
				}
119

    
120
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
121
				if (p.isAccurate()) {
122
					author.setName(p.getNormalisedFirstName());
123
					author.setSurname(p.getNormalisedSurname());
124
				}
125
				metadata.addAuthor(author);
126
			}
127
		}
128

    
129
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
130
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
131
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
132
			if (values.get(fieldname) != null) {
133
				for (final String s : values.get(fieldname).listValues()) {
134
					addField(metadata, mDesc.findFieldByName(fieldname), s);
135
				}
136
			}
137
		}
138
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
139
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
140
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
141
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
142
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
143

    
144
		String resulttype = getResultType(values);
145
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
146

    
147
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
148
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
149
		if (values.get("concept") != null) {
150
			for (final Element e : values.get("concept")) {
151
				final String id = e.getAttributes().get("id");
152
				if (StringUtils.isNotBlank(id)) {
153
					metadata.addContext(Context.newBuilder().setId(id));
154
				}
155
			}
156
		}
157
		if (values.get("journal") != null) {
158
			for (final Element e : values.get("journal")) {
159

    
160
				final Journal.Builder journal = Journal.newBuilder();
161
				if (e.getText() != null) {
162
					journal.setName(e.getText());
163
				}
164

    
165
				final Map<String, String> attr = e.getAttributes();
166
				if (attr != null) {
167
					if (attr.get("issn") != null) {
168
						journal.setIssnPrinted(attr.get("issn"));
169
					}
170
					if (attr.get("eissn") != null) {
171
						journal.setIssnOnline(attr.get("eissn"));
172
					}
173
					if (attr.get("lissn") != null) {
174
						journal.setIssnLinking(attr.get("lissn"));
175
					}
176

    
177
					if (attr.get("ep") != null) {
178
						journal.setEp(attr.get("ep"));
179
					}
180
					if (attr.get("iss") != null) {
181
						journal.setIss(attr.get("iss"));
182
					}
183
					if (attr.get("sp") != null) {
184
						journal.setSp(attr.get("sp"));
185
					}
186
					if (attr.get("vol") != null) {
187
						journal.setVol(attr.get("vol"));
188
					}
189
				}
190
				metadata.setJournal(journal.build());
191
			}
192
		}
193
		return metadata;
194
	}
195

    
196
	private static String getResultType(final ValueMap values) {
197
			
198
		final Element cobjcategory = values.get("cobjcategory").stream()
199
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes()))
200
				.findFirst()
201
				.orElse(new Element("0000", new HashMap<>()));
202

    
203
		final String resulttype = cobjcategory.getAttributeValue("type");
204
		if (StringUtils.isNotBlank(resulttype)) {
205
			return resulttype;
206
		}
207

    
208
		return getDefaultResulttype(cobjcategory);
209
	}
210

    
211
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
212
			final ValueMap values,
213
			final List<KeyValue> collectedFrom,
214
			final List<KeyValue> hostedBy) {
215
		final Result.Builder result = Result.newBuilder();
216

    
217
		final Instance.Builder instance = Instance.newBuilder();
218

    
219
		addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues());
220

    
221
		addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
222
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
223

    
224
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
225
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
226

    
227
		addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy);
228
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
229
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
230

    
231
		if (values.get("identifier") != null) {
232
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
233
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
234
		}
235

    
236
		result.addInstance(instance);
237

    
238
		final List<Element> extrefs = values.get("reference");
239
		if (!extrefs.isEmpty()) {
240
			final Descriptor extDesc = ExternalReference.getDescriptor();
241
			for (final Element element : extrefs) {
242
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
243
				addField(extref, extDesc.findFieldByName("url"), element.getText());
244
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
245
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
246
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
247
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
248
				addField(extref, extDesc.findFieldByName("qualifier"),
249
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
250
								.build());
251

    
252
				result.addExternalReference(extref);
253
			}
254
		}
255

    
256
		return result.setMetadata(metadata);
257
	}
258

    
259
	private static void handleException(Throwable e, final String resultId, final ValueMap values) {
260
		System.err.println("resultId: " + resultId);
261
		if (values != null) {
262
			System.err.println("values: " + values);
263
		}
264
		e.printStackTrace();
265
		throw new RuntimeException(e);
266
	}
267
}
(7-7/10)