Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.HashMap;
4
import java.util.List;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
11
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.lang3.StringUtils;
21
import org.w3c.dom.NodeList;
22

    
23
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

    
25
	public static String oafResult(
26
			final String resultId,
27
			final boolean invisible,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String originalId,
32
			final String dateOfCollection,
33
			final String dateOfTransformation,
34
			final NodeList metadata) {
35

    
36
		ValueMap values = null;
37
		try {
38
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
39
			values = ValueMap.parseNodeList(metadata);
40
			final Descriptor mDesc = Result.Metadata.getDescriptor();
41

    
42
			final List<KeyValue> collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource);
43
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
44

    
45
			final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc);
46
			final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy);
47
			final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId);
48
			entity.setDateofcollection(dateOfCollection)
49
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
50

    
51
			final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
52
			return base64(oaf.toByteArray());
53
		} catch (final Throwable e) {
54
			handleException(e, resultId, values);
55
		}
56
		return null;
57
	}
58

    
59
	public static String oafResultUpdate(final String resultId,
60
			final String provenance,
61
			final String trust,
62
			final NodeList nodelist) {
63
		ValueMap values = null;
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			values = ValueMap.parseNodeList(nodelist);
67
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
68

    
69
			final Descriptor mDesc = Result.Metadata.getDescriptor();
70

    
71
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
72
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
73

    
74
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
75
			final Oaf oaf = getOaf(entity, null);
76
			return base64(oaf.toByteArray());
77
		} catch (final Throwable e) {
78
			handleException(e, resultId, values);
79
		}
80
		return null;
81
	}
82

    
83
	private static OafEntity.Builder buildOafEntity(
84
			final Result.Builder result,
85
			final String entityId,
86
			final NodeList nodelist,
87
			final List<KeyValue> collectedFrom,
88
			final String originalId) {
89

    
90
		final List<StructuredProperty> pids = Lists.newArrayList();
91
		pids.addAll(parsePids(nodelist));
92

    
93
		final OafEntity.Builder entity =
94
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
95
						.setResult(result);
96
		return entity;
97
	}
98

    
99
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
100
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
101

    
102
		if (values.get("creator") != null) {
103
			for (final Element e : values.get("creator")) {
104

    
105
				final Author.Builder author = Author.newBuilder();
106

    
107
				final String fullname = e.getText();
108
				author.setFullname(fullname);
109
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
110

    
111
				final String nameIdentifier = e.getAttributeValue("nameIdentifier");
112
				final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme");
113

    
114
				if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) {
115
					author.addPid(getKV(nameIdentifierScheme, nameIdentifier));
116
				}
117

    
118
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
119
				if (p.isAccurate()) {
120
					author.setName(p.getNormalisedFirstName());
121
					author.setSurname(p.getNormalisedSurname());
122
				}
123
				metadata.addAuthor(author);
124
			}
125
		}
126

    
127
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
128
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
129
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
130
			if (values.get(fieldname) != null) {
131
				for (final String s : values.get(fieldname).listValues()) {
132
					addField(metadata, mDesc.findFieldByName(fieldname), s);
133
				}
134
			}
135
		}
136
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
137
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
138
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
139
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
140
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
141

    
142
		String resulttype = getResultType(values);
143
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
144

    
145
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
146
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
147
		if (values.get("concept") != null) {
148
			for (final Element e : values.get("concept")) {
149
				final String id = e.getAttributes().get("id");
150
				if (StringUtils.isNotBlank(id)) {
151
					metadata.addContext(Context.newBuilder().setId(id));
152
				}
153
			}
154
		}
155
		if (values.get("journal") != null) {
156
			for (final Element e : values.get("journal")) {
157
				addJournal(metadata, e);
158
			}
159
		}
160

    
161
		return metadata;
162
	}
163

    
164
	private static String getResultType(final ValueMap values) {
165
			
166
		final Element cobjcategory = values.get("cobjcategory").stream()
167
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes()))
168
				.findFirst()
169
				.orElse(new Element("0000", new HashMap<>()));
170

    
171
		final String resulttype = cobjcategory.getAttributeValue("type");
172
		if (StringUtils.isNotBlank(resulttype)) {
173
			return resulttype;
174
		}
175

    
176
		return getDefaultResulttype(cobjcategory);
177
	}
178

    
179
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
180
			final ValueMap values,
181
			final List<KeyValue> collectedFrom,
182
			final List<KeyValue> hostedBy) {
183
		final Result.Builder result = Result.newBuilder();
184

    
185
		final Instance.Builder instance = Instance.newBuilder();
186

    
187
		addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues());
188

    
189
		addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
190
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
191

    
192
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
193
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
194

    
195
		addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy);
196
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
197
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
198

    
199
		if (values.get("identifier") != null) {
200
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
201
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
202
		}
203
        if (values.get("refereed") != null) {
204
            addField(instance, Instance.getDescriptor().findFieldByName("refereed"), values.get("refereed").listValues());
205
        }
206

    
207
		final ElementList pcs = values.get("processingchargeamount");
208
		if (pcs != null && !pcs.isEmpty()) {
209
			addField(instance, Instance.getDescriptor().findFieldByName("processingchargeamount"), pcs.listValues());
210
			final String currency = pcs.get(0).getAttributeValue("currency");
211
			if (StringUtils.isNotBlank(currency)) {
212
				addField(instance, Instance.getDescriptor().findFieldByName("processingchargecurrency"), currency);
213
			}
214
		}
215

    
216
        // #4468: processingchargeamount - implementatio to clarify: result vs instance. Single value vs repeatable
217
//        if (values.get("processingchargeamount") != null) {
218
//            ElementList processingchargeamount = values.get("processingchargeamount");
219
//            for(Element pc : processingchargeamount) {
220
//                addField(instance, Instance.getDescriptor().findFieldByName("processingchargeamount"), pc.getText());
221
//                addField(instance, Instance.getDescriptor().findFieldByName("processingchargecurrency"), pc.getAttributeValue("processingchargecurrency"));
222
//            }
223
//        }
224

    
225
		result.addInstance(instance);
226

    
227
		final List<Element> extrefs = values.get("reference");
228
		if (!extrefs.isEmpty()) {
229
			final Descriptor extDesc = ExternalReference.getDescriptor();
230
			for (final Element element : extrefs) {
231
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
232
				addField(extref, extDesc.findFieldByName("url"), element.getText());
233
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
234
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
235
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
236
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
237
				addField(extref, extDesc.findFieldByName("qualifier"),
238
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
239
								.build());
240

    
241
				result.addExternalReference(extref);
242
			}
243
		}
244

    
245
		return result.setMetadata(metadata);
246
	}
247

    
248
	private static void handleException(Throwable e, final String resultId, final ValueMap values) {
249
		System.err.println("resultId: " + resultId);
250
		if (values != null) {
251
			System.err.println("values: " + values);
252
		}
253
		e.printStackTrace();
254
		throw new RuntimeException(e);
255
	}
256
}
(7-7/10)