Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

    
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.ProjectProtos.Project;
30
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
31
import eu.dnetlib.data.proto.ResultProtos.Result;
32
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
33
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
36
import eu.dnetlib.data.proto.TypeProtos;
37
import eu.dnetlib.data.proto.TypeProtos.Type;
38
import org.apache.commons.lang3.StringUtils;
39
import org.dom4j.Document;
40
import org.dom4j.DocumentException;
41
import org.dom4j.Element;
42
import org.dom4j.Node;
43
import org.dom4j.io.SAXReader;
44

    
45
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
46

    
47
public class XmlRecordFactory {
48

    
49
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
50

    
51
	private final Map<String, Integer> relCounters = Maps.newHashMap();
52
	protected Set<String> specialDatasourceTypes;
53
	protected TemplateFactory templateFactory = new TemplateFactory();
54
	protected OafDecoder mainEntity = null;
55
	protected String key = null;
56
	protected List<OafDecoder> relations = Lists.newLinkedList();
57
	protected List<OafDecoder> children = Lists.newLinkedList();
58
	protected EntityConfigTable entityConfigTable;
59
	protected ContextMapper contextMapper;
60
	protected RelClasses relClasses;
61
	protected String schemaLocation;
62
	protected boolean entityDefaults;
63
	protected boolean relDefaults;
64
	protected boolean childDefaults;
65
	protected Set<String> contextes = Sets.newHashSet();
66
	protected List<String> extraInfo = Lists.newArrayList();
67
	protected Map<String, Integer> counters = Maps.newHashMap();
68
	protected Transformer transformer;
69

    
70
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
71
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation");
72
		@Override
73
		public boolean apply(final String s) {
74
			return instanceFieldFilter.contains(s);
75
		}
76
	};
77

    
78
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
79
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
80
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
81
		this.entityConfigTable = entityConfigTable;
82
		this.contextMapper = contextMapper;
83
		this.relClasses = relClasses;
84
		this.schemaLocation = schemaLocation;
85
		this.entityDefaults = entityDefaults;
86
		this.relDefaults = relDefaults;
87
		this.childDefaults = childDefeaults;
88
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
89

    
90
		transformer = TransformerFactory.newInstance().newTransformer();
91
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
92
	}
93

    
94
	public static String removePrefix(final String s) {
95
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
96
		return s;
97
	}
98

    
99
	public static String escapeXml(final String value) {
100
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
101
	}
102

    
103
	public Map<String, Integer> getRelCounters() {
104
		return relCounters;
105
	}
106

    
107
	public RelClasses getRelClasses() {
108
		return relClasses;
109
	}
110

    
111
	public String getId() {
112
		return key;
113
	}
114

    
115
	public boolean isValid() {
116
		return mainEntity != null;
117
	}
118

    
119
	public void setMainEntity(final OafDecoder mainEntity) {
120
		this.mainEntity = mainEntity;
121
		this.key = mainEntity.decodeEntity().getId();
122
	}
123

    
124
	public void addRelation(final Type type, final OafDecoder rel) {
125
		addRelOrChild(type, relations, rel);
126
	}
127

    
128
	public void addChild(final Type type, final OafDecoder child) {
129
		addRelOrChild(type, children, child);
130
	}
131

    
132
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
133

    
134
		final OafRel oafRel = decoder.getOafRel();
135
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
136
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
137

    
138
		if (getRelCounters().get(rd) == null) {
139
			getRelCounters().put(rd, 0);
140
		}
141

    
142
		if (ld == null) {
143
			list.add(decoder);
144
			return;
145
		}
146

    
147
		if (ld.getMax() < 0) {
148
			list.add(decoder);
149
			return;
150
		}
151

    
152
		if (getRelCounters().get(rd) < ld.getMax()) {
153
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
154
			list.add(decoder);
155
		}
156
	}
157

    
158
	public String build() {
159
		try {
160
			final OafEntityDecoder entity = mainEntity.decodeEntity();
161
			// log.info("building");
162
			// log.info("main: " + mainEntity);
163
			// log.info("rel:  " + relations);
164
			// log.info("chi:  " + children);
165
			// log.info("=============");
166

    
167
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
168
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
169

    
170
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
171
			final List<String> rels = listRelations();
172
			metadata.addAll(buildContexts(entity.getType()));
173
			metadata.add(parseDataInfo(mainEntity));
174

    
175
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
176

    
177
			return templateFactory
178
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
179
		} catch (final Throwable e) {
180
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
181
		}
182
	}
183

    
184
	private String parseDataInfo(final OafDecoder decoder) {
185
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
186

    
187
		final StringBuilder sb = new StringBuilder();
188
		sb.append("<datainfo>");
189
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
190
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
191
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
192
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
193
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
194
		sb.append("</datainfo>");
195

    
196
		return sb.toString();
197
	}
198

    
199
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
200

    
201
		final List<String> metadata = Lists.newArrayList();
202
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
203
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
204

    
205
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
206
			metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
207

    
208
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
209
		}
210
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
211
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
212
		}
213

    
214
		return metadata;
215
	}
216

    
217
	private Qualifier getBestAccessright() {
218
		Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
219
		final LicenseComparator lc = new LicenseComparator();
220
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
221
			if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
222
				bestAccessRight = instance.getAccessright();
223
			}
224
		}
225
		return bestAccessRight;
226
	}
227

    
228
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
229
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
230
	}
231

    
232
	private List<String> listRelations() {
233

    
234
		final List<String> rels = Lists.newArrayList();
235

    
236
		for (final OafDecoder decoder : this.relations) {
237

    
238
			final OafRel rel = decoder.getOafRel();
239
			final OafEntity cachedTarget = rel.getCachedTarget();
240
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
241

    
242
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
243
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
244

    
245
				final List<String> metadata = Lists.newArrayList();
246
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
247
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
248
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
249

    
250
				String semanticclass = "";
251
				String semanticscheme = "";
252

    
253
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
254

    
255
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
256

    
257
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
258
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
259
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
260
					if (d.getType().equals(Type.result)) {
261
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
262
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
263
							metadata.addAll(fields);
264
						}
265
					}
266
				}
267

    
268
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
269
				// debug
270
				if (relMetadata == null) {
271
					// System.err.println(this);
272
					semanticclass = semanticscheme = "UNKNOWN";
273
				} else {
274
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
275
					semanticscheme = relMetadata.getSemantics().getSchemename();
276
				}
277

    
278
				final String rd = relDescriptor.getSubRelType().toString();
279
				incrementCounter(rd);
280

    
281
				final DataInfo info = decoder.getOaf().getDataInfo();
282
				if (info.getInferred()) {
283
					incrementCounter(rd + "_inferred");
284
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
285
					incrementCounter(rd + "_collected");
286
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
287
					incrementCounter(rd + "_claimed");
288
				}
289

    
290
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
291

    
292
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
293

    
294
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
295
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
296
			}
297
		}
298
		return rels;
299
	}
300

    
301
	// //////////////////////////////////
302

    
303
	private List<String> listChildren() {
304

    
305
		final List<String> children = Lists.newArrayList();
306
		for (final OafDecoder decoder : this.children) {
307
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
308
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
309
		}
310
		final OafEntityDecoder entity = mainEntity.decodeEntity();
311
		if (entity.getType().equals(Type.result)) {
312
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
313
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
314
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
315
			}
316
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
317
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
318
				final List<String> fields = listFields(er, null, false, false);
319
				children.add(templateFactory.getChild("externalreference", null, fields));
320
			}
321
		}
322

    
323
		return children;
324
	}
325

    
326
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
327
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
328
		incrementCounter(relDescriptor.getSubRelType().toString());
329
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
330
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
331
	}
332

    
333
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
334

    
335
		final List<String> metadata = Lists.newArrayList();
336

    
337
		if (fields != null) {
338

    
339
			final Set<String> seen = Sets.newHashSet();
340

    
341
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
342
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
343

    
344
				final String name = e.getKey().getName();
345
				seen.add(name);
346
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
347
			}
348

    
349
			if (defaults) {
350
				final Iterable<FieldDescriptor> unseen =
351
						Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
352
				for(FieldDescriptor fd : unseen){
353
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
354
				}
355
			}
356
		}
357
		return metadata;
358
	}
359

    
360
	private Object getDefault(final FieldDescriptor fd) {
361
		switch (fd.getType()) {
362
		case BOOL:
363
			return false;
364
		case BYTES:
365
			return "".getBytes();
366
		case MESSAGE: {
367
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
368
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
369
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
370
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
371
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
372
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
373
			return null;
374
		}
375
		case SFIXED32:
376
		case SFIXED64:
377
		case SINT32:
378
		case SINT64:
379
		case INT32:
380
		case INT64:
381
		case DOUBLE:
382
		case FIXED32:
383
		case FIXED64:
384
		case FLOAT:
385
			return 0;
386
		case STRING:
387
			return "";
388
		default:
389
			return null;
390
		}
391
	}
392

    
393
	private Qualifier defaultQualifier() {
394
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
395
	}
396

    
397
	@SuppressWarnings("unchecked")
398
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
399
		if ("dateofcollection".equals(fd.getName()) ||
400
			"dateoftransformation".equals(fd.getName()) ||
401
			"id".equals(fd.getName()) ||
402
				(value == null)) return;
403

    
404
		if (fd.getName().equals("datasourcetype")) {
405
			final String classid = ((Qualifier) value).getClassid();
406

    
407
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
408
			if (specialDatasourceTypes.contains(classid)) {
409
				q.setClassid("other").setClassname("other");
410
			}
411
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
412
		}
413

    
414
		if (fd.isRepeated() && (value instanceof List<?>)) {
415
			for (final Object o : (List<Object>) value) {
416
				guessType(metadata, fd, o, expandingRel);
417
			}
418
		} else {
419
			guessType(metadata, fd, value, expandingRel);
420
		}
421
	}
422

    
423
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
424

    
425
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
426

    
427
			if(Author.getDescriptor().equals(fd.getMessageType())) {
428

    
429
				final Author a = (Author) o;
430

    
431
				final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
432
				if (a.hasName()) {
433
					sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
434
				}
435
				if (a.hasSurname()) {
436
					sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
437
				}
438
				if (a.getPidCount() > 0) {
439
					a.getPidList().stream()
440
						.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
441
						.forEach(kv -> sb.append(String.format(" %s=\"%s\"", escapeXml(kv.getKey()), escapeXml(kv.getValue()))));
442
				}
443

    
444
				sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
445

    
446
				metadata.add(sb.toString());
447
			}
448

    
449
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
450
				final Qualifier qualifier = (Qualifier) o;
451
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
452
			}
453

    
454
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
455
				final StructuredProperty sp = (StructuredProperty) o;
456
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
457

    
458
				if (!expandingRel && fd.getName().equals("pid")) {
459
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
460
						incrementCounter("doi");
461
					}
462
				}
463
			}
464

    
465
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
466
				final KeyValue kv = (KeyValue) o;
467
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
468
			}
469

    
470
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
471
				final String fieldName = fd.getName();
472

    
473
				if (fieldName.equals("fundingtree")) {
474
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
475

    
476
					if (expandingRel) {
477
						metadata.add(getRelFundingTree(xmlTree));
478
						fillContextMap(xmlTree);
479
					} else {
480
						metadata.add(xmlTree);
481
					}
482
				} else {
483
					final StringField sf = (StringField) o;
484
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
485
					if (sf.hasDataInfo()) {
486
						final DataInfo dataInfo = sf.getDataInfo();
487
						dataInfoAsAttributes(sb, dataInfo);
488
					}
489
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
490
					metadata.add(sb.toString());
491
				}
492
			}
493

    
494
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
495
				final BoolField bf = (BoolField) o;
496
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
497
				if (bf.hasDataInfo()) {
498
					final DataInfo dataInfo = bf.getDataInfo();
499
					dataInfoAsAttributes(sb, dataInfo);
500
				}
501

    
502
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
503
				metadata.add(sb.toString());
504
			}
505

    
506
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
507
				final Journal j = (Journal) o;
508
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
509
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
510
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
511
			}
512

    
513
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
514
				final String contextid = ((Context) o).getId();
515
				contextes.add(contextid);
516
				/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
517
				if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
518
					contextes.add("clarin");
519
				}
520

    
521
			}
522

    
523
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
524

    
525
				final ExtraInfo e = (ExtraInfo) o;
526
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
527

    
528
				sb.append("name=\"" + e.getName() + "\" ");
529
				sb.append("typology=\"" + e.getTypology() + "\" ");
530
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
531
				sb.append("trust=\"" + e.getTrust() + "\"");
532
				sb.append(">");
533
				sb.append(e.getValue());
534
				sb.append("</" + fd.getName() + ">");
535

    
536
				extraInfo.add(sb.toString());
537
			}
538

    
539
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
540
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
541
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
542
		} else {
543
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
544
		}
545
	}
546

    
547
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
548
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
549
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
550
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
551
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
552
		return sb;
553
	}
554

    
555
	private List<String> buildContexts(final Type type) {
556
		final List<String> res = Lists.newArrayList();
557

    
558
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
559

    
560
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
561

    
562
			for (final String context : contextes) {
563

    
564
				String id = "";
565
				for (final String token : Splitter.on("::").split(context)) {
566
					id += token;
567

    
568
					final ContextDef def = contextMapper.get(id);
569

    
570
					if (def == null) {
571
						continue;
572
						// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
573
					}
574

    
575
					if (def.getName().equals("context")) {
576
						final String xpath = "//context/@id='" + def.getId() + "'";
577
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
578
							document = addContextDef(document.gotoRoot(), def);
579
						}
580
					}
581

    
582
					if (def.getName().equals("category")) {
583
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
584
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
585
					}
586

    
587
					if (def.getName().equals("concept")) {
588
						document = addContextDef(document, def).gotoParent();
589
					}
590
					id += "::";
591
				}
592
			}
593

    
594
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
595
				try {
596
					res.add(asStringElement(x));
597
				} catch (final TransformerException e) {
598
					throw new RuntimeException(e);
599
				}
600
			}
601
		}
602

    
603
		return res;
604
	}
605

    
606
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
607
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
608
		if ((def.getType() != null) && !def.getType().isEmpty()) {
609
			tag.addAttribute("type", def.getType());
610
		}
611
		return tag;
612
	}
613

    
614
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
615
		final StringWriter buffer = new StringWriter();
616
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
617
		return buffer.toString();
618
	}
619

    
620
	@SuppressWarnings("unchecked")
621
	private String getRelFundingTree(final String xmlTree) {
622
		String funding = "<funding>";
623
		try {
624
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
625
			funding = "<funding>";
626
			// String _id = "";
627

    
628
			funding += getFunderElement(ftree);
629

    
630
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
631
				final Element e = (Element) o;
632
				final String _id = e.valueOf("./id");
633
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
634
				// _id += "::";
635
			}
636
		} catch (final DocumentException e) {
637
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
638
		} finally {
639
			funding += "</funding>";
640
		}
641
		return funding;
642
	}
643

    
644
	private String getFunderElement(final Document ftree) {
645
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
646
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
647
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
648
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
649

    
650
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
651
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
652
	}
653

    
654
	private void fillContextMap(final String xmlTree) {
655

    
656
		Document fundingPath;
657
		try {
658
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
659
		} catch (final DocumentException e) {
660
			throw new RuntimeException(e);
661
		}
662
		try {
663
			final Node funder = fundingPath.selectSingleNode("//funder");
664

    
665
			if (funder != null) {
666

    
667
				final String funderShortName = funder.valueOf("./shortname");
668
				contextes.add(funderShortName);
669

    
670
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
671
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
672
				if (level0 != null) {
673
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
674
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
675
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
676
					if (level1 == null) {
677
						contextes.add(level0Id);
678
					} else {
679
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
680
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
681
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
682
						if (level2 == null) {
683
							contextes.add(level1Id);
684
						} else {
685
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
686
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
687
							contextes.add(level2Id);
688
						}
689
					}
690
				}
691
			}
692
		} catch (final NullPointerException e) {
693
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
694
		}
695
	}
696

    
697
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
698
		StringBuilder sb = new StringBuilder();
699
		sb.append("<");
700
		sb.append(name);
701
		if (q != null) {
702
			sb.append(getAttributes(q));
703
		}
704
		if (dataInfo != null) {
705
			sb = dataInfoAsAttributes(sb, dataInfo);
706
		}
707
		if ((value == null) || value.isEmpty()) {
708
			sb.append("/>");
709
			return sb.toString();
710
			// return "<" + name + getAttributes(q) + "/>";
711
		}
712

    
713
		sb.append(">");
714
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
715
		sb.append(escapeXml(value));
716
		sb.append("</");
717
		sb.append(name);
718
		sb.append(">");
719

    
720
		return sb.toString();
721
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
722
	}
723

    
724
	private String getAttributes(final Qualifier q) {
725
		if (q == null) return "";
726

    
727
		final StringBuilder sb = new StringBuilder();
728
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
729
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
730
			sb.append(" ");
731
			sb.append(e.getKey().getName());
732
			sb.append("=\"");
733
			sb.append(escapeXml(e.getValue().toString()));
734
			sb.append("\"");
735
		}
736
		return sb.toString();
737
	}
738

    
739

    
740
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
741
		if(acceptFilter == null) return fields.getAllFields();
742
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
743
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
744
			if (acceptFilter.apply(e.getKey().getName())) {
745
				res.put(e.getKey(), e.getValue());
746
			}
747
		}
748
		return res;
749
	}
750

    
751

    
752

    
753
	private List<String> countersAsXml() {
754
		final List<String> out = Lists.newArrayList();
755
		for (final Entry<String, Integer> e : counters.entrySet()) {
756
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
757
		}
758
		return out;
759
	}
760

    
761
	private void incrementCounter(final String type) {
762
		if (!counters.containsKey(type)) {
763
			counters.put(type, 1);
764
		} else {
765
			counters.put(type, counters.get(type) + 1);
766
		}
767
	}
768

    
769
	@Override
770
	public String toString() {
771
		final StringBuilder sb = new StringBuilder();
772
		sb.append("################################################\n");
773
		sb.append("ID: ").append(key).append("\n");
774
		if (mainEntity != null) {
775
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
776
		}
777
		if (relations != null) {
778
			sb.append("\nRELATIONS:\n");
779
			for (final OafDecoder decoder : relations) {
780
				sb.append(decoder.getOafRel().toString() + "\n");
781
			}
782
		}
783
		if (children != null) {
784
			sb.append("\nCHILDREN:\n");
785
			for (final OafDecoder decoder : children) {
786
				sb.append(decoder.getOafRel().toString() + "\n");
787
			}
788
		}
789
		return sb.toString();
790
	}
791

    
792
}
(7-7/7)