Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
12
import eu.dnetlib.data.proto.OafProtos.Oaf;
13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
14
import eu.dnetlib.data.proto.PersonProtos.Person;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import org.apache.commons.lang.StringUtils;
22
import org.w3c.dom.NodeList;
23

    
24
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
25

    
26
	public static String oafResult(
27
			final String resultId,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String hostedbyId,
32
			final String hostedbyName,
33
			final String collectedFromId,
34
			final String collectedFromName,
35
			final String originalId,
36
			final String dateOfCollection,
37
			final String dateOfTransformation,
38
			final NodeList nodelist) {
39
		try {
40
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
41
			final ValueMap values = ValueMap.parseNodeList(nodelist);
42
			final Descriptor mDesc = Result.Metadata.getDescriptor();
43

    
44
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
45
			final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName);
46
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, getKV(collectedFromId, collectedFromName), originalId);
47
			entity.setDateofcollection(dateOfCollection)
48
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
49

    
50
			final Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
51
			return base64(oaf.toByteArray());
52
		} catch (final Throwable e) {
53
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection);
54
		}
55
		return null;
56
	}
57

    
58
	public static String oafResultUpdate(final String resultId,
59
			final String provenance,
60
			final String trust,
61
			final NodeList nodelist,
62
			final String hostedbyId,
63
			final String hostedbyName) {
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			final ValueMap values = ValueMap.parseNodeList(nodelist);
67
			final Descriptor mDesc = Result.Metadata.getDescriptor();
68

    
69
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
70
			final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName);
71

    
72
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
73
			final Oaf oaf = getOaf(entity, null);
74
			return base64(oaf.toByteArray());
75
		} catch (final Throwable e) {
76
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, null, null, null, null);
77
		}
78
		return null;
79
	}
80

    
81
	private static OafEntity.Builder buildOafEntity(final Result.Builder result,
82
			final String entityId,
83
			final NodeList nodelist,
84
			KeyValue collectedFrom,
85
			String originalId) {
86

    
87
		final List<StructuredProperty> pids = Lists.newArrayList();
88
		pids.addAll(parsePids(nodelist));
89

    
90
		final OafEntity.Builder entity =
91
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
92
						.setResult(result);
93
		return entity;
94
	}
95

    
96
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
97
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
98

    
99
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
100
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
101
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
102
			if (values.get(fieldname) != null) {
103
				for (final String s : values.get(fieldname).listValues()) {
104
					addField(metadata, mDesc.findFieldByName(fieldname), s);
105
				}
106
			}
107
		}
108
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
109
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
110
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
111
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
112
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
113

    
114
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies"));
115

    
116
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
117
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
118
		if (values.get("concept") != null) {
119
			for (final Element e : values.get("concept")) {
120
				final String id = e.getAttributes().get("id");
121
				if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
122
				metadata.addContext(Context.newBuilder().setId(id));
123
			}
124
		}
125
		if (values.get("journal") != null) {
126
			for (final Element e : values.get("journal")) {
127

    
128
				final Journal.Builder journal = Journal.newBuilder();
129
				if (e.getText() != null) {
130
					journal.setName(e.getText());
131
				}
132

    
133
				final Map<String, String> attr = e.getAttributes();
134
				if (attr != null) {
135
					if (attr.get("issn") != null) {
136
						journal.setIssnPrinted(attr.get("issn"));
137
					}
138
					if (attr.get("eissn") != null) {
139
						journal.setIssnOnline(attr.get("eissn"));
140
					}
141
					if (attr.get("lissn") != null) {
142
						journal.setIssnLinking(attr.get("lissn"));
143
					}
144

    
145
					if (attr.get("ep") != null) {
146
						journal.setEp(attr.get("ep"));
147
					}
148
					if (attr.get("iss") != null) {
149
						journal.setIss(attr.get("iss"));
150
					}
151
					if (attr.get("sp") != null) {
152
						journal.setSp(attr.get("sp"));
153
					}
154
					if (attr.get("vol") != null) {
155
						journal.setVol(attr.get("vol"));
156
					}
157
				}
158
				metadata.setJournal(journal.build());
159
			}
160
		}
161
		return metadata;
162
	}
163

    
164
	private static Result.Builder   buildResult(final Result.Metadata.Builder metadata,
165
			final ValueMap values,
166
			final Descriptor mDesc,
167
			final String hostedbyId,
168
			final String hostedbyName) {
169
		final Result.Builder result = Result.newBuilder();
170
		if (values.get("creator") != null) {
171
			for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) {
172

    
173
				final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder();
174

    
175
				authorMetadata.setFullname(sf(fullname));
176

    
177
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
178
				if (p.isAccurate()) {
179
					authorMetadata.setFirstname(sf(p.getNormalisedFirstName()));
180
					authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname()));
181
					authorMetadata.setFullname(sf(p.getNormalisedFullname()));
182
				}
183

    
184
				result.addAuthor(Person.newBuilder().setMetadata(authorMetadata));
185
			}
186
		}
187

    
188
		final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName));
189

    
190
		addField(instance, Instance.getDescriptor().findFieldByName("licence"),
191
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
192
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
193
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
194

    
195
		if (values.get("identifier") != null) {
196
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
197
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
198
		}
199

    
200
		result.addInstance(instance);
201

    
202
		final List<Element> extrefs = values.get("reference");
203
		if (!extrefs.isEmpty()) {
204
			final Descriptor extDesc = ExternalReference.getDescriptor();
205
			for (final Element element : extrefs) {
206
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
207
				addField(extref, extDesc.findFieldByName("url"), element.getText());
208
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
209
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
210
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
211
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
212
				addField(extref, extDesc.findFieldByName("qualifier"),
213
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
214
								.build());
215

    
216
				result.addExternalReference(extref);
217
			}
218
		}
219

    
220
		return result.setMetadata(metadata);
221
	}
222

    
223
	private static void handleException(Throwable e, final String resultId, final String hostedbyId, final String hostedbyName,
224
			final String provenance, final String trust, final String collectedFromId, final String collectedFromName,
225
			final String originalId, final String dateOfCollection) {
226
		System.err.println("resultId: " + resultId);
227
		if (StringUtils.isNotBlank(hostedbyId)) System.err.println("hostedbyId: " + hostedbyId);
228
		if (StringUtils.isNotBlank(hostedbyName)) System.err.println("hostedbyName: " + hostedbyName);
229
		if (StringUtils.isNotBlank(provenance)) System.err.println("provenance: " + provenance);
230
		if (StringUtils.isNotBlank(trust)) System.err.println("trust: " + trust);
231
		if (StringUtils.isNotBlank(collectedFromId)) System.err.println("collectedFromId: " + collectedFromId);
232
		if (StringUtils.isNotBlank(collectedFromName)) System.err.println("collectedFromName: " + collectedFromName);
233
		if (StringUtils.isNotBlank(originalId)) System.err.println("originalId: " + originalId);
234
		if (StringUtils.isNotBlank(dateOfCollection)) System.err.println("dateOfCollection: " + dateOfCollection);
235
		e.printStackTrace();
236
		throw new RuntimeException(e);
237
	}
238
}
(6-6/9)