Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4
import java.util.Map;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
10
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
11
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
12
import eu.dnetlib.data.proto.FieldTypeProtos.*;
13
import eu.dnetlib.data.proto.PersonProtos.Person;
14
import eu.dnetlib.data.proto.PublicationProtos.Publication;
15
import eu.dnetlib.data.proto.TypeProtos.Type;
16
import org.apache.commons.lang3.StringUtils;
17
import org.w3c.dom.NodeList;
18

    
19
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
20

    
21
	public static String oafPublication(
22
			final String resultId,
23
			final String provenance,
24
			final String trust,
25
			final NodeList about,
26
			final String hostedbyId,
27
			final String hostedbyName,
28
			final String collectedFromId,
29
			final String collectedFromName,
30
			final String originalId,
31
			final String dateOfCollection,
32
			final String dateOfTransformation,
33
			final NodeList nodelist) {
34
		try {
35
			final String entityId = DNGFRowKeyDecoder.decode(resultId).getKey();
36
			final ValueMap values = ValueMap.parseNodeList(nodelist);
37
			final Descriptor mDesc = Publication.Metadata.getDescriptor();
38

    
39
			final Publication.Metadata.Builder metadata = buildMetadata(values, mDesc);
40
			final Publication.Builder result = buildPublication(metadata, values, mDesc, hostedbyId, hostedbyName);
41
			final DNGFEntity.Builder entity = buildOafEntity(result, entityId, nodelist, getKV(collectedFromId, collectedFromName), originalId);
42
			entity.setDateofcollection(dateOfCollection)
43
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
44

    
45
			final DNGF oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
46
			return base64(oaf.toByteArray());
47
		} catch (final Throwable e) {
48
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection);
49
		}
50
		return null;
51
	}
52

    
53
	public static String oafPublicationUpdate(final String resultId,
54
			final String provenance,
55
			final String trust,
56
			final NodeList nodelist,
57
			final String hostedbyId,
58
			final String hostedbyName) {
59
		try {
60
			final String entityId = DNGFRowKeyDecoder.decode(resultId).getKey();
61
			final ValueMap values = ValueMap.parseNodeList(nodelist);
62
			final Descriptor mDesc = Publication.Metadata.getDescriptor();
63

    
64
			final Publication.Metadata.Builder metadata = buildMetadata(values, mDesc);
65
			final Publication.Builder result = buildPublication(metadata, values, mDesc, hostedbyId, hostedbyName);
66

    
67
			final DNGFEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
68
			final DNGF oaf = getOaf(entity, null);
69
			return base64(oaf.toByteArray());
70
		} catch (final Throwable e) {
71
			handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, null, null, null, null);
72
		}
73
		return null;
74
	}
75

    
76
	private static DNGFEntity.Builder buildOafEntity(final Publication.Builder pub,
77
			final String entityId,
78
			final NodeList nodelist,
79
			KeyValue collectedFrom,
80
			String originalId) {
81

    
82
		final List<StructuredProperty> pids = Lists.newArrayList();
83
		pids.addAll(parsePids(nodelist));
84

    
85
		final DNGFEntity.Builder entity =
86
				getEntity(Type.publication, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
87
						.setPublication(pub);
88
		return entity;
89
	}
90

    
91
	protected static Publication.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
92
		final Publication.Metadata.Builder metadata = Publication.Metadata.newBuilder();
93
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject").listValues(), "keyword", "dnet:result_subject");
94
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title").listValues(), "main title", "dnet:dataCite_title");
95
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
96
			if (values.get(fieldname) != null) {
97
				for (final String s : values.get(fieldname).listValues()) {
98
					addField(metadata, mDesc.findFieldByName(fieldname), s);
99
				}
100
			}
101
		}
102
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
103
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
104
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
105
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
106
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
107

    
108
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies"));
109

    
110
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
111
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
112
		if (values.get("concept") != null) {
113
			for (final Element e : values.get("concept")) {
114
				final String id = e.getAttributes().get("id");
115
				if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank");
116
				metadata.addContext(Context.newBuilder().setId(id));
117
			}
118
		}
119
		if (values.get("journal") != null) {
120
			for (final Element e : values.get("journal")) {
121

    
122
				final Journal.Builder journal = Journal.newBuilder();
123
				if (e.getText() != null) {
124
					journal.setName(e.getText());
125
				}
126

    
127
				final Map<String, String> attr = e.getAttributes();
128
				if (attr != null) {
129
					if (attr.get("issn") != null) {
130
						journal.setIssnPrinted(attr.get("issn"));
131
					}
132
					if (attr.get("eissn") != null) {
133
						journal.setIssnOnline(attr.get("eissn"));
134
					}
135
					if (attr.get("lissn") != null) {
136
						journal.setIssnLinking(attr.get("lissn"));
137
					}
138
				}
139
				metadata.setJournal(journal.build());
140
			}
141
		}
142
		return metadata;
143
	}
144

    
145
	protected static Publication.Builder buildPublication(final Publication.Metadata.Builder metadata,
146
			final ValueMap values,
147
			final Descriptor mDesc,
148
			final String hostedbyId,
149
			final String hostedbyName) {
150
		final Publication.Builder result = Publication.newBuilder();
151
		if (values.get("creator") != null) {
152
			for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) {
153

    
154
				final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder();
155

    
156
				authorMetadata.setFullname(sf(fullname));
157

    
158
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
159
				if (p.isAccurate()) {
160
					authorMetadata.setFirstname(sf(p.getNormalisedFirstName()));
161
					authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname()));
162
					authorMetadata.setFullname(sf(p.getNormalisedFullname()));
163
				}
164

    
165
				result.addAuthor(Person.newBuilder().setMetadata(authorMetadata));
166
			}
167
		}
168

    
169
		final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName));
170

    
171
		addField(instance, Instance.getDescriptor().findFieldByName("licence"),
172
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
173
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
174
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
175

    
176
		if (values.get("identifier") != null) {
177
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
178
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
179
		}
180

    
181
		result.addInstance(instance);
182

    
183
		final List<Element> extrefs = values.get("reference");
184
		if (!extrefs.isEmpty()) {
185
			final Descriptor extDesc = ExternalReference.getDescriptor();
186
			for (final Element element : extrefs) {
187
				final ExternalReference.Builder extref = ExternalReference.newBuilder();
188
				addField(extref, extDesc.findFieldByName("url"), element.getText());
189
				addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source"));
190
				addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier"));
191
				addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title"));
192
				addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query"));
193
				addField(extref, extDesc.findFieldByName("qualifier"),
194
						setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type")))
195
								.build());
196

    
197
				result.addExternalReference(extref);
198
			}
199
		}
200

    
201
		return result.setMetadata(metadata);
202
	}
203

    
204
	private static void handleException(Throwable e, final String resultId, final String hostedbyId, final String hostedbyName,
205
			final String provenance, final String trust, final String collectedFromId, final String collectedFromName,
206
			final String originalId, final String dateOfCollection) {
207
		System.err.println("resultId: " + resultId);
208
		if (StringUtils.isNotBlank(hostedbyId)) System.err.println("hostedbyId: " + hostedbyId);
209
		if (StringUtils.isNotBlank(hostedbyName)) System.err.println("hostedbyName: " + hostedbyName);
210
		if (StringUtils.isNotBlank(provenance)) System.err.println("provenance: " + provenance);
211
		if (StringUtils.isNotBlank(trust)) System.err.println("trust: " + trust);
212
		if (StringUtils.isNotBlank(collectedFromId)) System.err.println("collectedFromId: " + collectedFromId);
213
		if (StringUtils.isNotBlank(collectedFromName)) System.err.println("collectedFromName: " + collectedFromName);
214
		if (StringUtils.isNotBlank(originalId)) System.err.println("originalId: " + originalId);
215
		if (StringUtils.isNotBlank(dateOfCollection)) System.err.println("dateOfCollection: " + dateOfCollection);
216
		e.printStackTrace();
217
		throw new RuntimeException(e);
218
	}
219

    
220

    
221
    public static String personPublication_Authorship(
222
            final String personIdentifier,
223
            final String datasetIdentifier,
224
            final int position,
225
            final String relation,
226
            final String provenance,
227
            final String trust,
228
            final NodeList about) {
229

    
230
        return authorship(personIdentifier, "person",
231
                datasetIdentifier, "publication", position,
232
                relation, provenance, trust, about);
233

    
234

    
235
    }
236
}
(6-6/9)