Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
12
import eu.dnetlib.data.proto.OafProtos.Oaf;
13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
14
import eu.dnetlib.data.proto.PersonProtos.Person;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import org.apache.commons.lang.StringUtils;
22
import org.w3c.dom.NodeList;
23

    
24
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
25

    
26
	public static String oafResult(
27
			final String resultId,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String hostedbyId,
32
			final String hostedbyName,
33
			final String collectedFromId,
34
			final String collectedFromName,
35
			final String originalId,
36
			final String dateOfCollection,
37
			final String dateOfTransformation,
38
			final NodeList nodelist) {
39
		try {
40
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
41
			final ValueMap values = ValueMap.parseNodeList(nodelist);
42
			final Descriptor mDesc = Result.Metadata.getDescriptor();
43

    
44
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
45
			final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName);
46
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, getKV(collectedFromId, collectedFromName), originalId);
47
			entity.setDateofcollection(dateOfCollection)
48
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
49

    
50
			final Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
51
			return base64(oaf.toByteArray());
52
		} catch (final Throwable e) {
53
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection);
54
		}
55
		return null;
56
	}
57

    
58
	public static String oafResultUpdate(final String resultId,
59
			final String provenance,
60
			final String trust,
61
			final NodeList nodelist,
62
			final String hostedbyId,
63
			final String hostedbyName) {
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			final ValueMap values = ValueMap.parseNodeList(nodelist);
67
			final Descriptor mDesc = Result.Metadata.getDescriptor();
68

    
69
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
70
			final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName);
71

    
72
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
73
			final Oaf oaf = getOaf(entity, null);
74
			return base64(oaf.toByteArray());
75
		} catch (final Throwable e) {
76
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, null, null, null, null);
77
		}
78
		return null;
79
	}
80

    
81
	private static OafEntity.Builder buildOafEntity(final Result.Builder result,
82
			final String entityId,
83
			final NodeList nodelist,
84
			KeyValue collectedFrom,
85
			String originalId) {
86

    
87
		final List<StructuredProperty> pids = Lists.newArrayList();
88
		pids.addAll(parsePids(nodelist));
89

    
90
		final OafEntity.Builder entity =
91
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
92
						.setResult(result);
93
		return entity;
94
	}
95

    
96
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
97
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
98

    
99
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
100
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
101
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
102
			if (values.get(fieldname) != null) {
103
				for (final String s : values.get(fieldname).listValues()) {
104
					addField(metadata, mDesc.findFieldByName(fieldname), s);
105
				}
106
			}
107
		}
108
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
109
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
110
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
111
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
112
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
113

    
114
		final String cobjcategoryCode = values.get("cobjcategory").stream()
115
				.map(e -> e.getText())
116
				.map(s -> s != null && !s.isEmpty() ? s : "0000")
117
				.findFirst()
118
				.orElse("0000");
119
		String resulttype = "";
120
		switch (cobjcategoryCode) {
121
		// add here the code to be excluded from the default mapping as 'publication'
122

    
123
			/*
124
			case "0029":
125
				resulttype = "software";
126
				break;
127
			*/
128
		case "0021":
129
			resulttype = "dataset";
130
			break;
131
		default:
132
			resulttype = "publication";
133
		}
134
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
135

    
136
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
137
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
138
		if (values.get("concept") != null) {
139
			for (final Element e : values.get("concept")) {
140
				final String id = e.getAttributes().get("id");
141
				if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
142
				metadata.addContext(Context.newBuilder().setId(id));
143
			}
144
		}
145
		if (values.get("journal") != null) {
146
			for (final Element e : values.get("journal")) {
147

    
148
				final Journal.Builder journal = Journal.newBuilder();
149
				if (e.getText() != null) {
150
					journal.setName(e.getText());
151
				}
152

    
153
				final Map<String, String> attr = e.getAttributes();
154
				if (attr != null) {
155
					if (attr.get("issn") != null) {
156
						journal.setIssnPrinted(attr.get("issn"));
157
					}
158
					if (attr.get("eissn") != null) {
159
						journal.setIssnOnline(attr.get("eissn"));
160
					}
161
					if (attr.get("lissn") != null) {
162
						journal.setIssnLinking(attr.get("lissn"));
163
					}
164
				}
165
				metadata.setJournal(journal.build());
166
			}
167
		}
168
		return metadata;
169
	}
170

    
171
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
172
			final ValueMap values,
173
			final Descriptor mDesc,
174
			final String hostedbyId,
175
			final String hostedbyName) {
176
		final Result.Builder result = Result.newBuilder();
177
		if (values.get("creator") != null) {
178
			for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) {
179

    
180
				final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder();
181

    
182
				authorMetadata.setFullname(sf(fullname));
183

    
184
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
185
				if (p.isAccurate()) {
186
					authorMetadata.setFirstname(sf(p.getNormalisedFirstName()));
187
					authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname()));
188
					authorMetadata.setFullname(sf(p.getNormalisedFullname()));
189
				}
190

    
191
				result.addAuthor(Person.newBuilder().setMetadata(authorMetadata));
192
			}
193
		}
194

    
195
		final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName));
196

    
197
		addField(instance, Instance.getDescriptor().findFieldByName("licence"),
198
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
199
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
200
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
201

    
202
		if (values.get("identifier") != null) {
203
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
204
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
205
		}
206

    
207
		result.addInstance(instance);
208

    
209
		final List<Element> extrefs = values.get("reference");
210
		if (!extrefs.isEmpty()) {
211
			final Descriptor extDesc = ExternalReference.getDescriptor();
212
			for (final Element element : extrefs) {
213
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
214
				addField(extref, extDesc.findFieldByName("url"), element.getText());
215
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
216
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
217
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
218
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
219
				addField(extref, extDesc.findFieldByName("qualifier"),
220
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
221
								.build());
222

    
223
				result.addExternalReference(extref);
224
			}
225
		}
226

    
227
		return result.setMetadata(metadata);
228
	}
229

    
230
	private static void handleException(Throwable e, final String resultId, final String hostedbyId, final String hostedbyName,
231
			final String provenance, final String trust, final String collectedFromId, final String collectedFromName,
232
			final String originalId, final String dateOfCollection) {
233
		System.err.println("resultId: " + resultId);
234
		if (StringUtils.isNotBlank(hostedbyId)) System.err.println("hostedbyId: " + hostedbyId);
235
		if (StringUtils.isNotBlank(hostedbyName)) System.err.println("hostedbyName: " + hostedbyName);
236
		if (StringUtils.isNotBlank(provenance)) System.err.println("provenance: " + provenance);
237
		if (StringUtils.isNotBlank(trust)) System.err.println("trust: " + trust);
238
		if (StringUtils.isNotBlank(collectedFromId)) System.err.println("collectedFromId: " + collectedFromId);
239
		if (StringUtils.isNotBlank(collectedFromName)) System.err.println("collectedFromName: " + collectedFromName);
240
		if (StringUtils.isNotBlank(originalId)) System.err.println("originalId: " + originalId);
241
		if (StringUtils.isNotBlank(dateOfCollection)) System.err.println("dateOfCollection: " + dateOfCollection);
242
		e.printStackTrace();
243
		throw new RuntimeException(e);
244
	}
245
}
(6-6/9)