Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
11
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import org.apache.commons.lang.StringUtils;
22
import org.w3c.dom.NodeList;
23

    
24
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
25

    
26
	public static String oafResult(
27
			final String resultId,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String hostedbyId,
32
			final String hostedbyName,
33
			final String collectedFromId,
34
			final String collectedFromName,
35
			final String originalId,
36
			final String dateOfCollection,
37
			final String dateOfTransformation,
38
			final NodeList nodelist) {
39
		try {
40
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
41
			final ValueMap values = ValueMap.parseNodeList(nodelist);
42
			final Descriptor mDesc = Result.Metadata.getDescriptor();
43
			final KeyValue collectedFrom = getKV(collectedFromId, collectedFromName);
44
			final KeyValue hostedBy = getKV(hostedbyId, hostedbyName);
45

    
46
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
47
			final Result.Builder result = buildResult(metadata, values, collectedFrom, hostedBy);
48
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, collectedFrom, originalId);
49
			entity.setDateofcollection(dateOfCollection)
50
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
51

    
52
			final Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
53
			return base64(oaf.toByteArray());
54
		} catch (final Throwable e) {
55
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection);
56
		}
57
		return null;
58
	}
59

    
60
	public static String oafResultUpdate(final String resultId,
61
			final String provenance,
62
			final String trust,
63
			final NodeList nodelist,
64
			final String hostedbyId,
65
			final String hostedbyName) {
66
		try {
67
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
68
			final ValueMap values = ValueMap.parseNodeList(nodelist);
69
			final Descriptor mDesc = Result.Metadata.getDescriptor();
70

    
71
			final KeyValue hostedBy = getKV(hostedbyId, hostedbyName);
72

    
73
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
74
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
75

    
76
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
77
			final Oaf oaf = getOaf(entity, null);
78
			return base64(oaf.toByteArray());
79
		} catch (final Throwable e) {
80
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, null, null, null, null);
81
		}
82
		return null;
83
	}
84

    
85
	private static OafEntity.Builder buildOafEntity(final Result.Builder result,
86
			final String entityId,
87
			final NodeList nodelist,
88
			KeyValue collectedFrom,
89
			String originalId) {
90

    
91
		final List<StructuredProperty> pids = Lists.newArrayList();
92
		pids.addAll(parsePids(nodelist));
93

    
94
		final OafEntity.Builder entity =
95
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
96
						.setResult(result);
97
		return entity;
98
	}
99

    
100
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
101
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
102

    
103
		if (values.get("creator") != null) {
104
			for (final Element e : values.get("creator")) {
105

    
106
				final Author.Builder author = Author.newBuilder();
107

    
108
				final String fullname = e.getText();
109
				author.setFullname(fullname);
110
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
111

    
112
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
113
				if (p.isAccurate()) {
114
					author.setName(p.getNormalisedFirstName());
115
					author.setSurname(p.getNormalisedSurname());
116
				}
117
				metadata.addAuthor(author);
118
			}
119
		}
120

    
121
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
122
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
123
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
124
			if (values.get(fieldname) != null) {
125
				for (final String s : values.get(fieldname).listValues()) {
126
					addField(metadata, mDesc.findFieldByName(fieldname), s);
127
				}
128
			}
129
		}
130
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
131
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
132
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
133
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
134
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
135

    
136
		final String cobjcategoryCode = values.get("cobjcategory").stream()
137
				.map(e -> e.getText())
138
				.map(s -> s != null && !s.isEmpty() ? s : "0000")
139
				.findFirst()
140
				.orElse("0000");
141
		String resulttype = "";
142
		switch (cobjcategoryCode) {
143
		case "0029":
144
			resulttype = "software";
145
			break;
146
		default:
147
			resulttype = "publication";
148
		}
149
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
150

    
151
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
152
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
153
		if (values.get("concept") != null) {
154
			for (final Element e : values.get("concept")) {
155
				final String id = e.getAttributes().get("id");
156
				if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
157
				metadata.addContext(Context.newBuilder().setId(id));
158
			}
159
		}
160
		if (values.get("journal") != null) {
161
			for (final Element e : values.get("journal")) {
162

    
163
				final Journal.Builder journal = Journal.newBuilder();
164
				if (e.getText() != null) {
165
					journal.setName(e.getText());
166
				}
167

    
168
				final Map<String, String> attr = e.getAttributes();
169
				if (attr != null) {
170
					if (attr.get("issn") != null) {
171
						journal.setIssnPrinted(attr.get("issn"));
172
					}
173
					if (attr.get("eissn") != null) {
174
						journal.setIssnOnline(attr.get("eissn"));
175
					}
176
					if (attr.get("lissn") != null) {
177
						journal.setIssnLinking(attr.get("lissn"));
178
					}
179

    
180
					if (attr.get("ep") != null) {
181
						journal.setEp(attr.get("ep"));
182
					}
183
					if (attr.get("iss") != null) {
184
						journal.setIss(attr.get("iss"));
185
					}
186
					if (attr.get("sp") != null) {
187
						journal.setSp(attr.get("sp"));
188
					}
189
					if (attr.get("vol") != null) {
190
						journal.setVol(attr.get("vol"));
191
					}
192
				}
193
				metadata.setJournal(journal.build());
194
			}
195
		}
196
		return metadata;
197
	}
198

    
199
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
200
			final ValueMap values,
201
			final KeyValue collectedFrom,
202
			final KeyValue hostedBy) {
203
		final Result.Builder result = Result.newBuilder();
204

    
205
		/*
206
		if (values.get("creator") != null) {
207
			for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) {
208

    
209
				final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder();
210

    
211
				authorMetadata.setFullname(sf(fullname));
212

    
213
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
214
				if (p.isAccurate()) {
215
					authorMetadata.setFirstname(sf(p.getNormalisedFirstName()));
216
					authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname()));
217
					authorMetadata.setFullname(sf(p.getNormalisedFullname()));
218
				}
219

    
220
				result.addAuthor( Person.newBuilder().setMetadata(authorMetadata));
221
			}
222
		}
223
		*/
224

    
225
		final Instance.Builder instance = Instance.newBuilder().setHostedby(hostedBy);
226

    
227
		addField(instance, Instance.getDescriptor().findFieldByName("licence"),
228
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
229
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
230
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
231

    
232
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
233
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
234

    
235
		if (values.get("identifier") != null) {
236
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
237
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
238
		}
239

    
240
		result.addInstance(instance);
241

    
242
		final List<Element> extrefs = values.get("reference");
243
		if (!extrefs.isEmpty()) {
244
			final Descriptor extDesc = ExternalReference.getDescriptor();
245
			for (final Element element : extrefs) {
246
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
247
				addField(extref, extDesc.findFieldByName("url"), element.getText());
248
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
249
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
250
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
251
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
252
				addField(extref, extDesc.findFieldByName("qualifier"),
253
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
254
								.build());
255

    
256
				result.addExternalReference(extref);
257
			}
258
		}
259

    
260
		return result.setMetadata(metadata);
261
	}
262

    
263
	private static void handleException(Throwable e, final String resultId, final String hostedbyId, final String hostedbyName,
264
			final String provenance, final String trust, final String collectedFromId, final String collectedFromName,
265
			final String originalId, final String dateOfCollection) {
266
		System.err.println("resultId: " + resultId);
267
		if (StringUtils.isNotBlank(hostedbyId)) System.err.println("hostedbyId: " + hostedbyId);
268
		if (StringUtils.isNotBlank(hostedbyName)) System.err.println("hostedbyName: " + hostedbyName);
269
		if (StringUtils.isNotBlank(provenance)) System.err.println("provenance: " + provenance);
270
		if (StringUtils.isNotBlank(trust)) System.err.println("trust: " + trust);
271
		if (StringUtils.isNotBlank(collectedFromId)) System.err.println("collectedFromId: " + collectedFromId);
272
		if (StringUtils.isNotBlank(collectedFromName)) System.err.println("collectedFromName: " + collectedFromName);
273
		if (StringUtils.isNotBlank(originalId)) System.err.println("originalId: " + originalId);
274
		if (StringUtils.isNotBlank(dateOfCollection)) System.err.println("dateOfCollection: " + dateOfCollection);
275
		e.printStackTrace();
276
		throw new RuntimeException(e);
277
	}
278
}
(7-7/10)