Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
11
import eu.dnetlib.data.proto.OafProtos.Oaf;
12
import eu.dnetlib.data.proto.OafProtos.OafEntity;
13
import eu.dnetlib.data.proto.PersonProtos.Person;
14
import eu.dnetlib.data.proto.ResultProtos.Result;
15
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
16
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
17
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.lang.StringUtils;
21
import org.w3c.dom.NodeList;
22

    
23
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

    
25
	public static String oafResult(
26
			final String resultId,
27
			final String provenance,
28
			final String trust,
29
			final NodeList about,
30
			final String hostedbyId,
31
			final String hostedbyName,
32
			final String collectedFromId,
33
			final String collectedFromName,
34
			final String originalId,
35
			final String dateOfCollection,
36
			final String dateOfTransformation,
37
			final NodeList nodelist) {
38
		try {
39
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
40

    
41
			final Result.Builder result = Result.newBuilder();
42

    
43
			final ValueMap values = ValueMap.parseNodeList(nodelist);
44

    
45
			final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
46
			final Descriptor mDesc = Result.Metadata.getDescriptor();
47

    
48
			if (values.get("creator") != null) {
49
				for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) {
50

    
51
					final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder();
52

    
53
					authorMetadata.setFullname(sf(fullname));
54

    
55
					final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
56
					if (p.isAccurate()) {
57
						authorMetadata.setFirstname(sf(p.getNormalisedFirstName()));
58
						authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname()));
59
						authorMetadata.setFullname(sf(p.getNormalisedFullname()));
60
					}
61

    
62
					result.addAuthor(Person.newBuilder().setMetadata(authorMetadata));
63
				}
64
			}
65

    
66
			addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject").listValues(), "keyword", "dnet:result_subject");
67
			addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title").listValues(), "main title", "dnet:dataCite_title");
68

    
69
			for (final String fieldname : Lists.newArrayList("description", "source")) {
70
				if (values.get(fieldname) != null) {
71
					for (final String s : values.get(fieldname).listValues()) {
72
						addField(metadata, mDesc.findFieldByName(fieldname), s);
73
					}
74
				}
75
			}
76

    
77
			addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
78
			addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
79
			addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
80
			addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
81
			addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
82

    
83
			addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies"));
84

    
85
			addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
86
			addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
87

    
88
			// addField(metadata, Result.Metadata.getDescriptor().findFieldByName("provenanceaction"),
89
			// getSimpleQualifier("sysimport:crosswalk:repository", "dnet:provenanceActions").build());
90

    
91
			if (values.get("concept") != null) {
92
				for (final Element e : values.get("concept")) {
93
					final String id = e.getAttributes().get("id");
94
					if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
95
					metadata.addContext(Context.newBuilder().setId(id));
96
				}
97
			}
98

    
99
			if (values.get("journal") != null) {
100
				for (final Element e : values.get("journal")) {
101

    
102
					final Journal.Builder journal = Journal.newBuilder();
103
					if (e.getText() != null) {
104
						journal.setName(e.getText());
105
					}
106

    
107
					final Map<String, String> attr = e.getAttributes();
108
					if (attr != null) {
109
						if (attr.get("issn") != null) {
110
							journal.setIssnPrinted(attr.get("issn"));
111
						}
112
						if (attr.get("eissn") != null) {
113
							journal.setIssnOnline(attr.get("eissn"));
114
						}
115
						if (attr.get("lissn") != null) {
116
							journal.setIssnLinking(attr.get("lissn"));
117
						}
118
					}
119
					metadata.setJournal(journal.build());
120
				}
121
			}
122

    
123
			final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName));
124

    
125
			addField(instance, Instance.getDescriptor().findFieldByName("licence"),
126
					setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
127
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
128
					setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
129

    
130
			if (values.get("identifier") != null) {
131
				addField(instance, Instance.getDescriptor().findFieldByName("url"),
132
						Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
133
			}
134

    
135
			result.addInstance(instance);
136

    
137
			final List<Element> extrefs = values.get("reference");
138
			if (!extrefs.isEmpty()) {
139
				final Descriptor extDesc = ExternalReference.getDescriptor();
140
				for (final Element element : extrefs) {
141
					final ExternalReference.Builder extref = ExternalReference.newBuilder();
142
					addField(extref, extDesc.findFieldByName("url"), element.getText());
143
					addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
144
					addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
145
					addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
146
					addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
147
					addField(extref, extDesc.findFieldByName("qualifier"),
148
							setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
149
							.build());
150

    
151
					result.addExternalReference(extref);
152
				}
153
			}
154

    
155
			final List<StructuredProperty> pids = Lists.newArrayList();
156
			pids.addAll(parsePids(nodelist));
157
			pids.add(getStructuredProperty(originalId, "oai", getClassName("oai"), "dnet:pid_types", "dnet:pid_types"));
158

    
159
			final OafEntity.Builder entity =
160
					getEntity(Type.result, entityId, getKV(collectedFromId, collectedFromName), Lists.newArrayList(originalId), dateOfCollection,
161
							dateOfTransformation, pids)
162
					.setResult(result.setMetadata(metadata));
163

    
164
			entity.setOaiprovenance(getOAIProvenance(about));
165

    
166
			final Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
167
			return base64(oaf.toByteArray());
168
		} catch (final Throwable e) {
169
			System.err.println("resultId: " + resultId);
170
			System.err.println("hostedbyId: " + hostedbyId);
171
			System.err.println("hostedbyName: " + hostedbyName);
172
			System.err.println("provenance: " + provenance);
173
			System.err.println("trust: " + trust);
174
			System.err.println("collectedFromId: " + collectedFromId);
175
			System.err.println("collectedFromName: " + collectedFromName);
176
			System.err.println("originalId: " + originalId);
177
			System.err.println("dateOfCollection: " + dateOfCollection);
178
			e.printStackTrace();
179
			throw new RuntimeException(e);
180
		}
181
	}
182

    
183
}
(6-6/8)