Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.HashMap;
4
import java.util.List;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
11
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.lang3.StringUtils;
21
import org.w3c.dom.NodeList;
22

    
23
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

    
25
	public static String oafResult(
26
			final String resultId,
27
			final boolean invisible,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String originalId,
32
			final String dateOfCollection,
33
			final String dateOfTransformation,
34
			final NodeList metadata) {
35

    
36
		ValueMap values = null;
37
		try {
38
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
39
			values = ValueMap.parseNodeList(metadata);
40
			final Descriptor mDesc = Result.Metadata.getDescriptor();
41

    
42
			final List<KeyValue> collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource);
43
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
44

    
45
			final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc);
46
			final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy);
47
			final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId);
48
			entity.setDateofcollection(dateOfCollection)
49
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
50

    
51
			final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
52
			return base64(oaf.toByteArray());
53
		} catch (final Throwable e) {
54
			handleException(e, resultId, values);
55
		}
56
		return null;
57
	}
58

    
59
	public static String oafResultUpdate(final String resultId,
60
			final String provenance,
61
			final String trust,
62
			final NodeList nodelist) {
63
		ValueMap values = null;
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			values = ValueMap.parseNodeList(nodelist);
67
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
68

    
69
			final Descriptor mDesc = Result.Metadata.getDescriptor();
70

    
71
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
72
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
73

    
74
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
75
			final Oaf oaf = getOaf(entity, null);
76
			return base64(oaf.toByteArray());
77
		} catch (final Throwable e) {
78
			handleException(e, resultId, values);
79
		}
80
		return null;
81
	}
82

    
83
	private static OafEntity.Builder buildOafEntity(
84
			final Result.Builder result,
85
			final String entityId,
86
			final NodeList nodelist,
87
			final List<KeyValue> collectedFrom,
88
			final String originalId) {
89

    
90
		final List<StructuredProperty> pids = Lists.newArrayList();
91
		pids.addAll(parsePids(nodelist));
92

    
93
		final OafEntity.Builder entity =
94
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
95
						.setResult(result);
96
		return entity;
97
	}
98

    
99
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
100
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
101

    
102
		if (values.get("creator") != null) {
103
			for (final Element e : values.get("creator")) {
104

    
105
				final Author.Builder author = Author.newBuilder();
106

    
107
				final String fullname = e.getText();
108
				author.setFullname(fullname);
109
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
110

    
111
				final String nameIdentifier = e.getAttributeValue("nameIdentifier");
112
				final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme");
113

    
114
				if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) {
115
					author.addPid(getKV(nameIdentifierScheme, nameIdentifier));
116
				}
117

    
118
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
119
				if (p.isAccurate()) {
120
					author.setName(p.getNormalisedFirstName());
121
					author.setSurname(p.getNormalisedSurname());
122
				}
123
				metadata.addAuthor(author);
124
			}
125
		}
126

    
127
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
128
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
129
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor", "refereed")) {
130
			if (values.get(fieldname) != null) {
131
				for (final String s : values.get(fieldname).listValues()) {
132
					addField(metadata, mDesc.findFieldByName(fieldname), s);
133
				}
134
			}
135
		}
136
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
137
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
138
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
139
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
140
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
141

    
142
		String resulttype = getResultType(values);
143
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
144

    
145
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
146
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
147
		if (values.get("concept") != null) {
148
			for (final Element e : values.get("concept")) {
149
				final String id = e.getAttributes().get("id");
150
				if (StringUtils.isNotBlank(id)) {
151
					metadata.addContext(Context.newBuilder().setId(id));
152
				}
153
			}
154
		}
155
		if (values.get("journal") != null) {
156
			for (final Element e : values.get("journal")) {
157
				addJournal(metadata, e);
158
			}
159
		}
160
		return metadata;
161
	}
162

    
163
	private static String getResultType(final ValueMap values) {
164
			
165
		final Element cobjcategory = values.get("cobjcategory").stream()
166
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes()))
167
				.findFirst()
168
				.orElse(new Element("0000", new HashMap<>()));
169

    
170
		final String resulttype = cobjcategory.getAttributeValue("type");
171
		if (StringUtils.isNotBlank(resulttype)) {
172
			return resulttype;
173
		}
174

    
175
		return getDefaultResulttype(cobjcategory);
176
	}
177

    
178
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
179
			final ValueMap values,
180
			final List<KeyValue> collectedFrom,
181
			final List<KeyValue> hostedBy) {
182
		final Result.Builder result = Result.newBuilder();
183

    
184
		final Instance.Builder instance = Instance.newBuilder();
185

    
186
		addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues());
187

    
188
		addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
189
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
190

    
191
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
192
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
193

    
194
		addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy);
195
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
196
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
197

    
198
		if (values.get("identifier") != null) {
199
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
200
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
201
		}
202

    
203
		result.addInstance(instance);
204

    
205
		final List<Element> extrefs = values.get("reference");
206
		if (!extrefs.isEmpty()) {
207
			final Descriptor extDesc = ExternalReference.getDescriptor();
208
			for (final Element element : extrefs) {
209
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
210
				addField(extref, extDesc.findFieldByName("url"), element.getText());
211
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
212
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
213
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
214
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
215
				addField(extref, extDesc.findFieldByName("qualifier"),
216
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
217
								.build());
218

    
219
				result.addExternalReference(extref);
220
			}
221
		}
222

    
223
		return result.setMetadata(metadata);
224
	}
225

    
226
	private static void handleException(Throwable e, final String resultId, final ValueMap values) {
227
		System.err.println("resultId: " + resultId);
228
		if (values != null) {
229
			System.err.println("values: " + values);
230
		}
231
		e.printStackTrace();
232
		throw new RuntimeException(e);
233
	}
234
}
(7-7/10)