Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.HashMap;
4
import java.util.List;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
11
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.lang3.StringUtils;
21
import org.w3c.dom.NodeList;
22

    
23
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

    
25
	public static String oafResult(
26
			final String resultId,
27
			final boolean invisible,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String originalId,
32
			final String dateOfCollection,
33
			final String dateOfTransformation,
34
			final NodeList metadata) {
35

    
36
		ValueMap values = null;
37
		try {
38
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
39
			values = ValueMap.parseNodeList(metadata);
40
			final Descriptor mDesc = Result.Metadata.getDescriptor();
41

    
42
			final List<KeyValue> collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource);
43
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
44

    
45
			final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc);
46
			final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy);
47
			final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId);
48
			entity.setDateofcollection(dateOfCollection)
49
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
50

    
51
			final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
52
			return base64(oaf.toByteArray());
53
		} catch (final Throwable e) {
54
			handleException(e, resultId, values);
55
		}
56
		return null;
57
	}
58

    
59
	public static String oafResultUpdate(final String resultId,
60
			final String provenance,
61
			final String trust,
62
			final NodeList nodelist) {
63
		ValueMap values = null;
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			values = ValueMap.parseNodeList(nodelist);
67
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
68

    
69
			final Descriptor mDesc = Result.Metadata.getDescriptor();
70

    
71
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
72
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
73

    
74
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
75
			final Oaf oaf = getOaf(entity, null);
76
			return base64(oaf.toByteArray());
77
		} catch (final Throwable e) {
78
			handleException(e, resultId, values);
79
		}
80
		return null;
81
	}
82

    
83
	private static OafEntity.Builder buildOafEntity(
84
			final Result.Builder result,
85
			final String entityId,
86
			final NodeList nodelist,
87
			final List<KeyValue> collectedFrom,
88
			final String originalId) {
89

    
90
		final List<StructuredProperty> pids = Lists.newArrayList();
91
		pids.addAll(parsePids(nodelist));
92

    
93
		final OafEntity.Builder entity =
94
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
95
						.setResult(result);
96
		return entity;
97
	}
98

    
99
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
100
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
101

    
102
		if (values.get("creator") != null) {
103
			for (final Element e : values.get("creator")) {
104

    
105
				final Author.Builder author = Author.newBuilder();
106

    
107
				final String fullname = e.getText();
108
				author.setFullname(fullname);
109
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
110

    
111
				final String nameIdentifier = e.getAttributeValue("nameIdentifier");
112
				final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme");
113

    
114
				if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) {
115
					author.addPid(getKV(nameIdentifierScheme, nameIdentifier));
116
				}
117

    
118
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
119
				if (p.isAccurate()) {
120
					author.setName(p.getNormalisedFirstName());
121
					author.setSurname(p.getNormalisedSurname());
122
				}
123
				metadata.addAuthor(author);
124
			}
125
		}
126

    
127
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
128
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
129
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor", "refereed")) {
130
			if (values.get(fieldname) != null) {
131
				for (final String s : values.get(fieldname).listValues()) {
132
					addField(metadata, mDesc.findFieldByName(fieldname), s);
133
				}
134
			}
135
		}
136
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
137
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
138
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
139
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
140
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
141

    
142
		String resulttype = getResultType(values);
143
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
144

    
145
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
146
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
147
		if (values.get("concept") != null) {
148
			for (final Element e : values.get("concept")) {
149
				final String id = e.getAttributes().get("id");
150
				if (StringUtils.isNotBlank(id)) {
151
					metadata.addContext(Context.newBuilder().setId(id));
152
				}
153
			}
154
		}
155
		if (values.get("journal") != null) {
156
			for (final Element e : values.get("journal")) {
157
				addJournal(metadata, e);
158
			}
159
		}
160

    
161
		// #4468: processingchargeamount - implementatio to clarify: result vs instance. Single value vs repeatable
162
//        if (values.get("processingchargeamount") != null) {
163
//            ElementList processingchargeamount = values.get("processingchargeamount");
164
//            for(Element pc : processingchargeamount) {
165
//                addField(metadata, mDesc.findFieldByName("processingchargeamount"), pc.getText());
166
//                addField(metadata, mDesc.findFieldByName("processingchargecurrency"), pc.getAttributeValue("processingchargecurrency"));
167
//            }
168
//        }
169
		return metadata;
170
	}
171

    
172
	private static String getResultType(final ValueMap values) {
173
			
174
		final Element cobjcategory = values.get("cobjcategory").stream()
175
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes()))
176
				.findFirst()
177
				.orElse(new Element("0000", new HashMap<>()));
178

    
179
		final String resulttype = cobjcategory.getAttributeValue("type");
180
		if (StringUtils.isNotBlank(resulttype)) {
181
			return resulttype;
182
		}
183

    
184
		return getDefaultResulttype(cobjcategory);
185
	}
186

    
187
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
188
			final ValueMap values,
189
			final List<KeyValue> collectedFrom,
190
			final List<KeyValue> hostedBy) {
191
		final Result.Builder result = Result.newBuilder();
192

    
193
		final Instance.Builder instance = Instance.newBuilder();
194

    
195
		addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues());
196

    
197
		addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
198
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
199

    
200
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
201
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
202

    
203
		addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy);
204
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
205
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
206

    
207
		if (values.get("identifier") != null) {
208
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
209
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
210
		}
211

    
212
		result.addInstance(instance);
213

    
214
		final List<Element> extrefs = values.get("reference");
215
		if (!extrefs.isEmpty()) {
216
			final Descriptor extDesc = ExternalReference.getDescriptor();
217
			for (final Element element : extrefs) {
218
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
219
				addField(extref, extDesc.findFieldByName("url"), element.getText());
220
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
221
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
222
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
223
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
224
				addField(extref, extDesc.findFieldByName("qualifier"),
225
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
226
								.build());
227

    
228
				result.addExternalReference(extref);
229
			}
230
		}
231

    
232
		return result.setMetadata(metadata);
233
	}
234

    
235
	private static void handleException(Throwable e, final String resultId, final ValueMap values) {
236
		System.err.println("resultId: " + resultId);
237
		if (values != null) {
238
			System.err.println("values: " + values);
239
		}
240
		e.printStackTrace();
241
		throw new RuntimeException(e);
242
	}
243
}
(7-7/10)