Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.mapreduce.util;
2
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9 40314 claudio.at
import javax.xml.transform.*;
10 26600 sandro.lab
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12
13 35179 michele.ar
import com.google.common.base.Joiner;
14 26600 sandro.lab
import com.google.common.base.Predicate;
15 37717 claudio.at
import com.google.common.base.Splitter;
16 46587 alessia.ba
import com.google.common.collect.Iterables;
17 26600 sandro.lab
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25 40314 claudio.at
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27 26600 sandro.lab
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.ProjectProtos.Project;
30
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
31
import eu.dnetlib.data.proto.ResultProtos.Result;
32
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
33
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35 46587 alessia.ba
import eu.dnetlib.data.proto.TypeProtos;
36 26600 sandro.lab
import eu.dnetlib.data.proto.TypeProtos.Type;
37 52524 claudio.at
import org.apache.commons.lang3.StringUtils;
38 40314 claudio.at
import org.dom4j.Document;
39
import org.dom4j.DocumentException;
40
import org.dom4j.Element;
41
import org.dom4j.Node;
42
import org.dom4j.io.SAXReader;
43 26600 sandro.lab
44 40314 claudio.at
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
45
46 26600 sandro.lab
public class XmlRecordFactory {
47
48 37616 claudio.at
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
49 37531 claudio.at
50 40314 claudio.at
	private final Map<String, Integer> relCounters = Maps.newHashMap();
51 43428 alessia.ba
	protected Set<String> specialDatasourceTypes;
52 26600 sandro.lab
	protected TemplateFactory templateFactory = new TemplateFactory();
53
	protected OafDecoder mainEntity = null;
54
	protected String key = null;
55 28226 claudio.at
	protected List<OafDecoder> relations = Lists.newLinkedList();
56
	protected List<OafDecoder> children = Lists.newLinkedList();
57 26600 sandro.lab
	protected EntityConfigTable entityConfigTable;
58
	protected ContextMapper contextMapper;
59 28094 claudio.at
	protected RelClasses relClasses;
60 26600 sandro.lab
	protected String schemaLocation;
61
	protected boolean entityDefaults;
62
	protected boolean relDefaults;
63
	protected boolean childDefaults;
64
	protected Set<String> contextes = Sets.newHashSet();
65 28094 claudio.at
	protected List<String> extraInfo = Lists.newArrayList();
66 30968 claudio.at
	protected Map<String, Integer> counters = Maps.newHashMap();
67 26600 sandro.lab
	protected Transformer transformer;
68
69 46587 alessia.ba
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
70 58071 alessia.ba
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation", "refereed");
71 46587 alessia.ba
		@Override
72
		public boolean apply(final String s) {
73
			return instanceFieldFilter.contains(s);
74
		}
75
	};
76
77 28226 claudio.at
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
78 43428 alessia.ba
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
79 33382 claudio.at
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
80 26600 sandro.lab
		this.entityConfigTable = entityConfigTable;
81
		this.contextMapper = contextMapper;
82 28094 claudio.at
		this.relClasses = relClasses;
83 26600 sandro.lab
		this.schemaLocation = schemaLocation;
84
		this.entityDefaults = entityDefaults;
85
		this.relDefaults = relDefaults;
86
		this.childDefaults = childDefeaults;
87 43428 alessia.ba
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
88 26600 sandro.lab
89
		transformer = TransformerFactory.newInstance().newTransformer();
90
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
91
	}
92
93 40314 claudio.at
	public static String removePrefix(final String s) {
94
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
95
		return s;
96
	}
97
98
	public static String escapeXml(final String value) {
99
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
100
	}
101
102 37334 claudio.at
	public Map<String, Integer> getRelCounters() {
103
		return relCounters;
104
	}
105
106
	public RelClasses getRelClasses() {
107
		return relClasses;
108
	}
109
110 26600 sandro.lab
	public String getId() {
111
		return key;
112
	}
113
114
	public boolean isValid() {
115
		return mainEntity != null;
116
	}
117
118
	public void setMainEntity(final OafDecoder mainEntity) {
119
		this.mainEntity = mainEntity;
120
		this.key = mainEntity.decodeEntity().getId();
121
	}
122
123 37334 claudio.at
	public void addRelation(final Type type, final OafDecoder rel) {
124
		addRelOrChild(type, relations, rel);
125 26600 sandro.lab
	}
126
127 37334 claudio.at
	public void addChild(final Type type, final OafDecoder child) {
128
		addRelOrChild(type, children, child);
129 26600 sandro.lab
	}
130
131 37334 claudio.at
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
132
133
		final OafRel oafRel = decoder.getOafRel();
134
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
135
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
136
137
		if (getRelCounters().get(rd) == null) {
138
			getRelCounters().put(rd, 0);
139
		}
140
141
		if (ld == null) {
142
			list.add(decoder);
143
			return;
144
		}
145
146
		if (ld.getMax() < 0) {
147
			list.add(decoder);
148
			return;
149
		}
150
151
		if (getRelCounters().get(rd) < ld.getMax()) {
152
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
153
			list.add(decoder);
154
		}
155 26600 sandro.lab
	}
156
157
	public String build() {
158 37717 claudio.at
		try {
159
			final OafEntityDecoder entity = mainEntity.decodeEntity();
160
			// log.info("building");
161
			// log.info("main: " + mainEntity);
162
			// log.info("rel:  " + relations);
163
			// log.info("chi:  " + children);
164
			// log.info("=============");
165 26600 sandro.lab
166 46587 alessia.ba
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
167
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
168 26600 sandro.lab
169 37717 claudio.at
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
170
			final List<String> rels = listRelations();
171 46587 alessia.ba
			metadata.addAll(buildContexts(entity.getType()));
172 37717 claudio.at
			metadata.add(parseDataInfo(mainEntity));
173 28226 claudio.at
174 46587 alessia.ba
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
175 26600 sandro.lab
176 40314 claudio.at
			return templateFactory
177 46587 alessia.ba
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
178 37717 claudio.at
		} catch (final Throwable e) {
179
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
180
		}
181 26600 sandro.lab
	}
182
183
	private String parseDataInfo(final OafDecoder decoder) {
184 35771 claudio.at
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
185 26600 sandro.lab
186 35771 claudio.at
		final StringBuilder sb = new StringBuilder();
187 26600 sandro.lab
		sb.append("<datainfo>");
188 28094 claudio.at
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
189
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
190
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
191
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
192
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
193 26600 sandro.lab
		sb.append("</datainfo>");
194
195
		return sb.toString();
196
	}
197
198 46587 alessia.ba
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
199 26600 sandro.lab
200
		final List<String> metadata = Lists.newArrayList();
201
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
202
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
203
204 35771 claudio.at
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
205 49096 claudio.at
			metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
206 26600 sandro.lab
207
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
208
		}
209 35771 claudio.at
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
210 26600 sandro.lab
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
211
		}
212
213
		return metadata;
214
	}
215
216 49096 claudio.at
	private Qualifier getBestAccessright() {
217
		Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
218 35771 claudio.at
		final LicenseComparator lc = new LicenseComparator();
219
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
220 49096 claudio.at
			if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
221
				bestAccessRight = instance.getAccessright();
222 26600 sandro.lab
			}
223
		}
224 49096 claudio.at
		return bestAccessRight;
225 26600 sandro.lab
	}
226
227 31409 claudio.at
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
228
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
229
	}
230
231 26600 sandro.lab
	private List<String> listRelations() {
232
233
		final List<String> rels = Lists.newArrayList();
234
235 35771 claudio.at
		for (final OafDecoder decoder : this.relations) {
236 26600 sandro.lab
237
			final OafRel rel = decoder.getOafRel();
238
			final OafEntity cachedTarget = rel.getCachedTarget();
239
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
240
241 28094 claudio.at
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
242
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
243 26600 sandro.lab
244
				final List<String> metadata = Lists.newArrayList();
245 46587 alessia.ba
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
246
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
247
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
248 26600 sandro.lab
249
				String semanticclass = "";
250
				String semanticscheme = "";
251
252 35771 claudio.at
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
253 28226 claudio.at
254 35771 claudio.at
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
255 28226 claudio.at
256 46587 alessia.ba
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
257 43896 claudio.at
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
258 46587 alessia.ba
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
259 43896 claudio.at
					if (d.getType().equals(Type.result)) {
260
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
261 48697 claudio.at
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
262
							metadata.addAll(fields);
263 43896 claudio.at
						}
264
					}
265 28094 claudio.at
				}
266 26600 sandro.lab
267 35771 claudio.at
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
268 28094 claudio.at
				// debug
269
				if (relMetadata == null) {
270
					// System.err.println(this);
271
					semanticclass = semanticscheme = "UNKNOWN";
272 26600 sandro.lab
				} else {
273 28094 claudio.at
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
274
					semanticscheme = relMetadata.getSemantics().getSchemename();
275 26600 sandro.lab
				}
276
277 42584 claudio.at
				final String rd = relDescriptor.getSubRelType().toString();
278
				incrementCounter(rd);
279 28226 claudio.at
280 42584 claudio.at
				final DataInfo info = decoder.getOaf().getDataInfo();
281
				if (info.getInferred()) {
282
					incrementCounter(rd + "_inferred");
283
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
284
					incrementCounter(rd + "_collected");
285
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
286
					incrementCounter(rd + "_claimed");
287
				}
288
289 35771 claudio.at
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
290 28226 claudio.at
291 35771 claudio.at
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
292 28226 claudio.at
293 48697 claudio.at
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
294 28226 claudio.at
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
295 26600 sandro.lab
			}
296
		}
297
		return rels;
298
	}
299
300 40314 claudio.at
	// //////////////////////////////////
301
302 26600 sandro.lab
	private List<String> listChildren() {
303
304
		final List<String> children = Lists.newArrayList();
305 35771 claudio.at
		for (final OafDecoder decoder : this.children) {
306
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
307 30968 claudio.at
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
308 26600 sandro.lab
		}
309 35771 claudio.at
		final OafEntityDecoder entity = mainEntity.decodeEntity();
310 26600 sandro.lab
		if (entity.getType().equals(Type.result)) {
311 35771 claudio.at
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
312 58086 claudio.at
				final List<String> instancemetadata = listFields(instance, instanceFilter, false, false);
313
314
				if (instance.hasProcessingchargeamount()) {
315
					instancemetadata.add("<processingchargeamount currency=\""
316
							+ instance.getProcessingchargecurrency().getValue() + "\">"
317
							+ instance.getProcessingchargeamount().getValue()
318
							+ "</processingchargeamount>");
319
				}
320
321
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), instancemetadata,
322 48697 claudio.at
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
323 26600 sandro.lab
			}
324 35771 claudio.at
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
325 26600 sandro.lab
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
326 35771 claudio.at
				final List<String> fields = listFields(er, null, false, false);
327 26600 sandro.lab
				children.add(templateFactory.getChild("externalreference", null, fields));
328
			}
329
		}
330
331
		return children;
332
	}
333
334 28094 claudio.at
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
335 26600 sandro.lab
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
336 30968 claudio.at
		incrementCounter(relDescriptor.getSubRelType().toString());
337 46587 alessia.ba
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
338
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
339 26600 sandro.lab
	}
340
341 46587 alessia.ba
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
342 26600 sandro.lab
343
		final List<String> metadata = Lists.newArrayList();
344
345
		if (fields != null) {
346
347 35771 claudio.at
			final Set<String> seen = Sets.newHashSet();
348 26600 sandro.lab
349 46587 alessia.ba
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
350
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
351
352 26600 sandro.lab
				final String name = e.getKey().getName();
353
				seen.add(name);
354
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
355
			}
356
357
			if (defaults) {
358 49029 claudio.at
				final Iterable<FieldDescriptor> unseen =
359
						Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
360 46587 alessia.ba
				for(FieldDescriptor fd : unseen){
361
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
362 26600 sandro.lab
				}
363
			}
364
		}
365
		return metadata;
366
	}
367
368
	private Object getDefault(final FieldDescriptor fd) {
369
		switch (fd.getType()) {
370
		case BOOL:
371
			return false;
372
		case BYTES:
373
			return "".getBytes();
374
		case MESSAGE: {
375 35771 claudio.at
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
376 37894 alessia.ba
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
377
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
378 35771 claudio.at
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
379
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
380 37616 claudio.at
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
381 26600 sandro.lab
			return null;
382
		}
383
		case SFIXED32:
384
		case SFIXED64:
385
		case SINT32:
386
		case SINT64:
387
		case INT32:
388
		case INT64:
389
		case FIXED32:
390
		case FIXED64:
391 57774 alessia.ba
		case DOUBLE:
392 26600 sandro.lab
		case FLOAT:
393
		case STRING:
394
			return "";
395
		default:
396
			return null;
397
		}
398
	}
399
400
	private Qualifier defaultQualifier() {
401
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
402
	}
403
404
	@SuppressWarnings("unchecked")
405
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
406 42501 claudio.at
		if ("dateofcollection".equals(fd.getName()) ||
407
			"dateoftransformation".equals(fd.getName()) ||
408
			"id".equals(fd.getName()) ||
409
				(value == null)) return;
410 26600 sandro.lab
411
		if (fd.getName().equals("datasourcetype")) {
412 35771 claudio.at
			final String classid = ((Qualifier) value).getClassid();
413 26600 sandro.lab
414 35771 claudio.at
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
415 26600 sandro.lab
			if (specialDatasourceTypes.contains(classid)) {
416
				q.setClassid("other").setClassname("other");
417
			}
418 28094 claudio.at
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
419 26600 sandro.lab
		}
420
421 35771 claudio.at
		if (fd.isRepeated() && (value instanceof List<?>)) {
422
			for (final Object o : (List<Object>) value) {
423 26600 sandro.lab
				guessType(metadata, fd, o, expandingRel);
424
			}
425
		} else {
426
			guessType(metadata, fd, value, expandingRel);
427
		}
428
	}
429
430
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
431
432
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
433
434 49029 claudio.at
			if(Author.getDescriptor().equals(fd.getMessageType())) {
435
436
				final Author a = (Author) o;
437
438
				final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
439
				if (a.hasName()) {
440
					sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
441
				}
442
				if (a.hasSurname()) {
443
					sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
444
				}
445 52524 claudio.at
				if (a.getPidCount() > 0) {
446
					a.getPidList().stream()
447 53371 claudio.at
							.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
448
							.forEach(kv -> {
449
								String pidType = escapeXml(kv.getKey())
450
										.replaceAll("\\W", "");
451
								String pidValue = escapeXml(kv.getValue());
452
								sb.append(String.format(" %s=\"%s\"", pidType, pidValue));
453
							});
454 52524 claudio.at
				}
455
456 49029 claudio.at
				sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
457
458
				metadata.add(sb.toString());
459
			}
460
461 26600 sandro.lab
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
462 35771 claudio.at
				final Qualifier qualifier = (Qualifier) o;
463 28094 claudio.at
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
464 26600 sandro.lab
			}
465
466
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
467 35771 claudio.at
				final StructuredProperty sp = (StructuredProperty) o;
468 28094 claudio.at
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
469 41681 claudio.at
470
				if (!expandingRel && fd.getName().equals("pid")) {
471
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
472
						incrementCounter("doi");
473
					}
474
				}
475 26600 sandro.lab
			}
476
477
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
478 35771 claudio.at
				final KeyValue kv = (KeyValue) o;
479 26600 sandro.lab
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
480
			}
481
482 28094 claudio.at
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
483 35771 claudio.at
				final String fieldName = fd.getName();
484 35179 michele.ar
485
				if (fieldName.equals("fundingtree")) {
486 35771 claudio.at
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
487 35179 michele.ar
488
					if (expandingRel) {
489
						metadata.add(getRelFundingTree(xmlTree));
490
						fillContextMap(xmlTree);
491
					} else {
492
						metadata.add(xmlTree);
493
					}
494 28094 claudio.at
				} else {
495 35771 claudio.at
					final StringField sf = (StringField) o;
496
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
497 28094 claudio.at
					if (sf.hasDataInfo()) {
498 35771 claudio.at
						final DataInfo dataInfo = sf.getDataInfo();
499 28094 claudio.at
						dataInfoAsAttributes(sb, dataInfo);
500
					}
501
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
502
					metadata.add(sb.toString());
503
				}
504
			}
505 37616 claudio.at
506
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
507
				final BoolField bf = (BoolField) o;
508
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
509
				if (bf.hasDataInfo()) {
510
					final DataInfo dataInfo = bf.getDataInfo();
511
					dataInfoAsAttributes(sb, dataInfo);
512
				}
513
514
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
515
				metadata.add(sb.toString());
516
			}
517
518 35771 claudio.at
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
519
				final Journal j = (Journal) o;
520 26600 sandro.lab
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
521 46587 alessia.ba
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
522
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
523 26600 sandro.lab
			}
524
525 35771 claudio.at
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
526 52751 alessia.ba
				final String contextid = ((Context) o).getId();
527
				contextes.add(contextid);
528
				/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
529
				if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
530
					contextes.add("clarin");
531
				}
532
533 26600 sandro.lab
			}
534
535 35771 claudio.at
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
536 26600 sandro.lab
537 35771 claudio.at
				final ExtraInfo e = (ExtraInfo) o;
538
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
539 28094 claudio.at
540
				sb.append("name=\"" + e.getName() + "\" ");
541
				sb.append("typology=\"" + e.getTypology() + "\" ");
542
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
543
				sb.append("trust=\"" + e.getTrust() + "\"");
544
				sb.append(">");
545 30827 claudio.at
				sb.append(e.getValue());
546 26600 sandro.lab
				sb.append("</" + fd.getName() + ">");
547 28094 claudio.at
548
				extraInfo.add(sb.toString());
549 26600 sandro.lab
			}
550
551
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
552 35771 claudio.at
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
553 28094 claudio.at
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
554 26600 sandro.lab
		} else {
555 57774 alessia.ba
			if(o instanceof String && o.equals("")){
556
				metadata.add(asXmlElement(fd.getName(), "", null, null));
557 57758 alessia.ba
			}
558 57774 alessia.ba
			else {
559
				switch (fd.getType()) {
560
					case SFIXED32:
561
					case SFIXED64:
562
					case SINT32:
563
					case SINT64:
564
					case INT32:
565
					case INT64:
566
					case FIXED32:
567
					case FIXED64:
568
						metadata.add(asXmlElement(fd.getName(), String.format("%s", o), null, null));
569
						break;
570
					case DOUBLE:
571
					case FLOAT:
572
						metadata.add(asXmlElement(fd.getName(), String.format("%.2f", ((Float) o)), null, null));
573
						break;
574
					default:
575
						metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
576
				}
577
			}
578 26600 sandro.lab
		}
579
	}
580
581 28226 claudio.at
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
582 28094 claudio.at
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
583
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
584 30863 claudio.at
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
585 28094 claudio.at
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
586
		return sb;
587 26600 sandro.lab
	}
588
589
	private List<String> buildContexts(final Type type) {
590
		final List<String> res = Lists.newArrayList();
591
592 35771 claudio.at
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
593 26600 sandro.lab
594
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
595
596 37717 claudio.at
			for (final String context : contextes) {
597 26600 sandro.lab
598 37717 claudio.at
				String id = "";
599
				for (final String token : Splitter.on("::").split(context)) {
600
					id += token;
601 26600 sandro.lab
602 37717 claudio.at
					final ContextDef def = contextMapper.get(id);
603 26600 sandro.lab
604 51221 claudio.at
					if (def == null) {
605
						continue;
606
						// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
607
					}
608 26600 sandro.lab
609
					if (def.getName().equals("context")) {
610 35771 claudio.at
						final String xpath = "//context/@id='" + def.getId() + "'";
611 26600 sandro.lab
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
612
							document = addContextDef(document.gotoRoot(), def);
613
						}
614
					}
615
616
					if (def.getName().equals("category")) {
617 35771 claudio.at
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
618 26600 sandro.lab
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
619
					}
620
621
					if (def.getName().equals("concept")) {
622
						document = addContextDef(document, def).gotoParent();
623
					}
624 37717 claudio.at
					id += "::";
625 26600 sandro.lab
				}
626
			}
627
628 35771 claudio.at
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
629 26600 sandro.lab
				try {
630
					res.add(asStringElement(x));
631 35771 claudio.at
				} catch (final TransformerException e) {
632 26600 sandro.lab
					throw new RuntimeException(e);
633
				}
634
			}
635
		}
636
637
		return res;
638
	}
639
640
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
641
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
642 35771 claudio.at
		if ((def.getType() != null) && !def.getType().isEmpty()) {
643 26600 sandro.lab
			tag.addAttribute("type", def.getType());
644
		}
645
		return tag;
646
	}
647
648
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
649 35771 claudio.at
		final StringWriter buffer = new StringWriter();
650 26600 sandro.lab
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
651
		return buffer.toString();
652
	}
653
654
	@SuppressWarnings("unchecked")
655 35179 michele.ar
	private String getRelFundingTree(final String xmlTree) {
656
		String funding = "<funding>";
657
		try {
658 35771 claudio.at
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
659 35179 michele.ar
			funding = "<funding>";
660
			// String _id = "";
661 26600 sandro.lab
662 37531 claudio.at
			funding += getFunderElement(ftree);
663
664 35771 claudio.at
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
665
				final Element e = (Element) o;
666
				final String _id = e.valueOf("./id");
667 37894 alessia.ba
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
668 35179 michele.ar
				// _id += "::";
669 26600 sandro.lab
			}
670 35771 claudio.at
		} catch (final DocumentException e) {
671 37531 claudio.at
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
672 35179 michele.ar
		} finally {
673
			funding += "</funding>";
674 26600 sandro.lab
		}
675 35179 michele.ar
		return funding;
676 26600 sandro.lab
	}
677
678 37531 claudio.at
	private String getFunderElement(final Document ftree) {
679
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
680
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
681
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
682
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
683
684
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
685
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
686
	}
687
688 35179 michele.ar
	private void fillContextMap(final String xmlTree) {
689 26600 sandro.lab
690 35179 michele.ar
		Document fundingPath;
691 26600 sandro.lab
		try {
692 35179 michele.ar
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
693 35771 claudio.at
		} catch (final DocumentException e) {
694 26600 sandro.lab
			throw new RuntimeException(e);
695
		}
696 37273 claudio.at
		try {
697
			final Node funder = fundingPath.selectSingleNode("//funder");
698 41468 claudio.at
699
			if (funder != null) {
700
701
				final String funderShortName = funder.valueOf("./shortname");
702
				contextes.add(funderShortName);
703
704
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
705
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
706
				if (level0 != null) {
707
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
708
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
709
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
710
					if (level1 == null) {
711
						contextes.add(level0Id);
712
					} else {
713
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
714
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
715
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
716
						if (level2 == null) {
717
							contextes.add(level1Id);
718
						} else {
719
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
720
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
721
							contextes.add(level2Id);
722
						}
723
					}
724 37273 claudio.at
				}
725
			}
726
		} catch (final NullPointerException e) {
727
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
728 35179 michele.ar
		}
729 26600 sandro.lab
	}
730
731 28094 claudio.at
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
732 26600 sandro.lab
		StringBuilder sb = new StringBuilder();
733
		sb.append("<");
734
		sb.append(name);
735
		if (q != null) {
736
			sb.append(getAttributes(q));
737
		}
738 28094 claudio.at
		if (dataInfo != null) {
739
			sb = dataInfoAsAttributes(sb, dataInfo);
740
		}
741 35771 claudio.at
		if ((value == null) || value.isEmpty()) {
742 26600 sandro.lab
			sb.append("/>");
743
			return sb.toString();
744
			// return "<" + name + getAttributes(q) + "/>";
745
		}
746
747
		sb.append(">");
748
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
749
		sb.append(escapeXml(value));
750
		sb.append("</");
751
		sb.append(name);
752
		sb.append(">");
753
754
		return sb.toString();
755
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
756
	}
757
758
	private String getAttributes(final Qualifier q) {
759 35771 claudio.at
		if (q == null) return "";
760 26600 sandro.lab
761 35771 claudio.at
		final StringBuilder sb = new StringBuilder();
762
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
763 26600 sandro.lab
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
764
			sb.append(" ");
765
			sb.append(e.getKey().getName());
766
			sb.append("=\"");
767
			sb.append(escapeXml(e.getValue().toString()));
768
			sb.append("\"");
769
		}
770
		return sb.toString();
771
	}
772
773
774 46587 alessia.ba
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
775
		if(acceptFilter == null) return fields.getAllFields();
776
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
777
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
778
			if (acceptFilter.apply(e.getKey().getName())) {
779
				res.put(e.getKey(), e.getValue());
780
			}
781 26600 sandro.lab
		}
782 46587 alessia.ba
		return res;
783 26600 sandro.lab
	}
784
785 46587 alessia.ba
786
787 30968 claudio.at
	private List<String> countersAsXml() {
788 35771 claudio.at
		final List<String> out = Lists.newArrayList();
789
		for (final Entry<String, Integer> e : counters.entrySet()) {
790 30968 claudio.at
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
791
		}
792
		return out;
793
	}
794
795
	private void incrementCounter(final String type) {
796
		if (!counters.containsKey(type)) {
797
			counters.put(type, 1);
798
		} else {
799
			counters.put(type, counters.get(type) + 1);
800
		}
801
	}
802
803 26600 sandro.lab
	@Override
804
	public String toString() {
805 35771 claudio.at
		final StringBuilder sb = new StringBuilder();
806 26600 sandro.lab
		sb.append("################################################\n");
807
		sb.append("ID: ").append(key).append("\n");
808
		if (mainEntity != null) {
809
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
810
		}
811
		if (relations != null) {
812
			sb.append("\nRELATIONS:\n");
813 35771 claudio.at
			for (final OafDecoder decoder : relations) {
814 26600 sandro.lab
				sb.append(decoder.getOafRel().toString() + "\n");
815
			}
816
		}
817
		if (children != null) {
818
			sb.append("\nCHILDREN:\n");
819 35771 claudio.at
			for (final OafDecoder decoder : children) {
820 26600 sandro.lab
				sb.append(decoder.getOafRel().toString() + "\n");
821
			}
822
		}
823
		return sb.toString();
824
	}
825
826
}