Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

    
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.PersonProtos.Person;
30
import eu.dnetlib.data.proto.ProjectProtos.Project;
31
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
32
import eu.dnetlib.data.proto.ResultProtos.Result;
33
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
34
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
35
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
36
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
37
import eu.dnetlib.data.proto.TypeProtos;
38
import eu.dnetlib.data.proto.TypeProtos.Type;
39
import org.apache.commons.lang.StringUtils;
40
import org.dom4j.Document;
41
import org.dom4j.DocumentException;
42
import org.dom4j.Element;
43
import org.dom4j.Node;
44
import org.dom4j.io.SAXReader;
45

    
46
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
47

    
48
public class XmlRecordFactory {
49

    
50
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
51

    
52
	private final Map<String, Integer> relCounters = Maps.newHashMap();
53
	protected Set<String> specialDatasourceTypes;
54
	protected TemplateFactory templateFactory = new TemplateFactory();
55
	protected OafDecoder mainEntity = null;
56
	protected String key = null;
57
	protected List<OafDecoder> relations = Lists.newLinkedList();
58
	protected List<OafDecoder> children = Lists.newLinkedList();
59
	protected EntityConfigTable entityConfigTable;
60
	protected ContextMapper contextMapper;
61
	protected RelClasses relClasses;
62
	protected String schemaLocation;
63
	protected boolean entityDefaults;
64
	protected boolean relDefaults;
65
	protected boolean childDefaults;
66
	protected Set<String> contextes = Sets.newHashSet();
67
	protected List<String> extraInfo = Lists.newArrayList();
68
	protected Map<String, Integer> counters = Maps.newHashMap();
69
	protected Transformer transformer;
70

    
71
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
72
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence", "collectedfrom", "dateofacceptance");
73
		@Override
74
		public boolean apply(final String s) {
75
			return instanceFieldFilter.contains(s);
76
		}
77
	};
78

    
79
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
80
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
81
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
82
		this.entityConfigTable = entityConfigTable;
83
		this.contextMapper = contextMapper;
84
		this.relClasses = relClasses;
85
		this.schemaLocation = schemaLocation;
86
		this.entityDefaults = entityDefaults;
87
		this.relDefaults = relDefaults;
88
		this.childDefaults = childDefeaults;
89
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
90

    
91
		transformer = TransformerFactory.newInstance().newTransformer();
92
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
93
	}
94

    
95
	public static String removePrefix(final String s) {
96
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
97
		return s;
98
	}
99

    
100
	public static String escapeXml(final String value) {
101
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
102
	}
103

    
104
	public Map<String, Integer> getRelCounters() {
105
		return relCounters;
106
	}
107

    
108
	public RelClasses getRelClasses() {
109
		return relClasses;
110
	}
111

    
112
	public String getId() {
113
		return key;
114
	}
115

    
116
	public boolean isValid() {
117
		return mainEntity != null;
118
	}
119

    
120
	public void setMainEntity(final OafDecoder mainEntity) {
121
		this.mainEntity = mainEntity;
122
		this.key = mainEntity.decodeEntity().getId();
123
	}
124

    
125
	public void addRelation(final Type type, final OafDecoder rel) {
126
		addRelOrChild(type, relations, rel);
127
	}
128

    
129
	public void addChild(final Type type, final OafDecoder child) {
130
		addRelOrChild(type, children, child);
131
	}
132

    
133
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
134

    
135
		final OafRel oafRel = decoder.getOafRel();
136
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
137
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
138

    
139
		if (getRelCounters().get(rd) == null) {
140
			getRelCounters().put(rd, 0);
141
		}
142

    
143
		if (ld == null) {
144
			list.add(decoder);
145
			return;
146
		}
147

    
148
		if (ld.getMax() < 0) {
149
			list.add(decoder);
150
			return;
151
		}
152

    
153
		if (getRelCounters().get(rd) < ld.getMax()) {
154
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
155
			list.add(decoder);
156
		}
157
	}
158

    
159
	public String build() {
160
		try {
161
			final OafEntityDecoder entity = mainEntity.decodeEntity();
162
			// log.info("building");
163
			// log.info("main: " + mainEntity);
164
			// log.info("rel:  " + relations);
165
			// log.info("chi:  " + children);
166
			// log.info("=============");
167

    
168
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
169
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
170

    
171
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
172
			final List<String> rels = listRelations();
173
			metadata.addAll(buildContexts(entity.getType()));
174
			metadata.add(parseDataInfo(mainEntity));
175

    
176
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
177

    
178
			return templateFactory
179
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
180
		} catch (final Throwable e) {
181
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
182
		}
183
	}
184

    
185
	private String parseDataInfo(final OafDecoder decoder) {
186
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
187

    
188
		final StringBuilder sb = new StringBuilder();
189
		sb.append("<datainfo>");
190
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
191
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
192
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
193
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
194
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
195
		sb.append("</datainfo>");
196

    
197
		return sb.toString();
198
	}
199

    
200
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
201

    
202
		final List<String> metadata = Lists.newArrayList();
203
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
204
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
205

    
206
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
207
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
208

    
209
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
210
		}
211
		if ((decoder.getEntity() instanceof Person) && !expandingRel) {
212
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
213
		}
214
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
215
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
216
		}
217

    
218
		return metadata;
219
	}
220

    
221
	private Qualifier getBestLicense() {
222
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
223
		final LicenseComparator lc = new LicenseComparator();
224
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
225
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
226
				bestLicense = instance.getLicence();
227
			}
228
		}
229
		return bestLicense;
230
	}
231

    
232
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
233
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
234
	}
235

    
236
	private List<String> listRelations() {
237

    
238
		final List<String> rels = Lists.newArrayList();
239

    
240
		for (final OafDecoder decoder : this.relations) {
241

    
242
			final OafRel rel = decoder.getOafRel();
243
			final OafEntity cachedTarget = rel.getCachedTarget();
244
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
245

    
246
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
247
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
248

    
249
				final List<String> metadata = Lists.newArrayList();
250
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
251
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
252
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
253

    
254
				String semanticclass = "";
255
				String semanticscheme = "";
256

    
257
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
258

    
259
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
260

    
261
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
262
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
263
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
264
					if (d.getType().equals(Type.result)) {
265
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
266
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
267
							metadata.addAll(fields);
268
						}
269
					}
270
				}
271

    
272
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
273
				// debug
274
				if (relMetadata == null) {
275
					// System.err.println(this);
276
					semanticclass = semanticscheme = "UNKNOWN";
277
				} else {
278
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
279
					semanticscheme = relMetadata.getSemantics().getSchemename();
280
				}
281

    
282
				final String rd = relDescriptor.getSubRelType().toString();
283
				incrementCounter(rd);
284

    
285
				final DataInfo info = decoder.getOaf().getDataInfo();
286
				if (info.getInferred()) {
287
					incrementCounter(rd + "_inferred");
288
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
289
					incrementCounter(rd + "_collected");
290
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
291
					incrementCounter(rd + "_claimed");
292
				}
293

    
294
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
295

    
296
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
297

    
298
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
299
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
300
			}
301
		}
302
		return rels;
303
	}
304

    
305
	// //////////////////////////////////
306

    
307
	private List<String> listChildren() {
308

    
309
		final List<String> children = Lists.newArrayList();
310
		for (final OafDecoder decoder : this.children) {
311
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
312
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
313
		}
314
		final OafEntityDecoder entity = mainEntity.decodeEntity();
315
		if (entity.getType().equals(Type.result)) {
316
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
317
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
318
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
319
			}
320
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
321
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
322
				final List<String> fields = listFields(er, null, false, false);
323
				children.add(templateFactory.getChild("externalreference", null, fields));
324
			}
325
		}
326

    
327
		return children;
328
	}
329

    
330
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
331
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
332
		incrementCounter(relDescriptor.getSubRelType().toString());
333
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
334
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
335
	}
336

    
337
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
338

    
339
		final List<String> metadata = Lists.newArrayList();
340

    
341
		if (fields != null) {
342

    
343
			final Set<String> seen = Sets.newHashSet();
344

    
345
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
346
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
347

    
348
				final String name = e.getKey().getName();
349
				seen.add(name);
350
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
351
			}
352

    
353
			if (defaults) {
354
				final Iterable<FieldDescriptor> unseen =
355
						Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
356
				for(FieldDescriptor fd : unseen){
357
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
358
				}
359
			}
360
		}
361
		return metadata;
362
	}
363

    
364
	private Object getDefault(final FieldDescriptor fd) {
365
		switch (fd.getType()) {
366
		case BOOL:
367
			return false;
368
		case BYTES:
369
			return "".getBytes();
370
		case MESSAGE: {
371
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
372
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
373
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
374
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
375
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
376
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
377
			return null;
378
		}
379
		case SFIXED32:
380
		case SFIXED64:
381
		case SINT32:
382
		case SINT64:
383
		case INT32:
384
		case INT64:
385
		case DOUBLE:
386
		case FIXED32:
387
		case FIXED64:
388
		case FLOAT:
389
			return 0;
390
		case STRING:
391
			return "";
392
		default:
393
			return null;
394
		}
395
	}
396

    
397
	private Qualifier defaultQualifier() {
398
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
399
	}
400

    
401
	@SuppressWarnings("unchecked")
402
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
403
		if ("dateofcollection".equals(fd.getName()) ||
404
			"dateoftransformation".equals(fd.getName()) ||
405
			"id".equals(fd.getName()) ||
406
				(value == null)) return;
407

    
408
		if (fd.getName().equals("datasourcetype")) {
409
			final String classid = ((Qualifier) value).getClassid();
410

    
411
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
412
			if (specialDatasourceTypes.contains(classid)) {
413
				q.setClassid("other").setClassname("other");
414
			}
415
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
416
		}
417

    
418
		if (fd.isRepeated() && (value instanceof List<?>)) {
419
			for (final Object o : (List<Object>) value) {
420
				guessType(metadata, fd, o, expandingRel);
421
			}
422
		} else {
423
			guessType(metadata, fd, value, expandingRel);
424
		}
425
	}
426

    
427
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
428

    
429
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
430

    
431
			if(Author.getDescriptor().equals(fd.getMessageType())) {
432

    
433
				final Author a = (Author) o;
434

    
435
				final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
436
				if (a.hasName()) {
437
					sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
438
				}
439
				if (a.hasSurname()) {
440
					sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
441
				}
442
				sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
443

    
444
				metadata.add(sb.toString());
445
			}
446

    
447
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
448
				final Qualifier qualifier = (Qualifier) o;
449
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
450
			}
451

    
452
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
453
				final StructuredProperty sp = (StructuredProperty) o;
454
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
455

    
456
				if (!expandingRel && fd.getName().equals("pid")) {
457
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
458
						incrementCounter("doi");
459
					}
460
				}
461
			}
462

    
463
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
464
				final KeyValue kv = (KeyValue) o;
465
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
466
			}
467

    
468
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
469
				final String fieldName = fd.getName();
470

    
471
				if (fieldName.equals("fundingtree")) {
472
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
473

    
474
					if (expandingRel) {
475
						metadata.add(getRelFundingTree(xmlTree));
476
						fillContextMap(xmlTree);
477
					} else {
478
						metadata.add(xmlTree);
479
					}
480
				} else {
481
					final StringField sf = (StringField) o;
482
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
483
					if (sf.hasDataInfo()) {
484
						final DataInfo dataInfo = sf.getDataInfo();
485
						dataInfoAsAttributes(sb, dataInfo);
486
					}
487
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
488
					metadata.add(sb.toString());
489
				}
490
			}
491

    
492
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
493
				final BoolField bf = (BoolField) o;
494
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
495
				if (bf.hasDataInfo()) {
496
					final DataInfo dataInfo = bf.getDataInfo();
497
					dataInfoAsAttributes(sb, dataInfo);
498
				}
499

    
500
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
501
				metadata.add(sb.toString());
502
			}
503

    
504
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
505
				final Journal j = (Journal) o;
506
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
507
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
508
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
509
			}
510

    
511
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
512
				contextes.add(((Context) o).getId());
513
			}
514

    
515
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
516

    
517
				final ExtraInfo e = (ExtraInfo) o;
518
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
519

    
520
				sb.append("name=\"" + e.getName() + "\" ");
521
				sb.append("typology=\"" + e.getTypology() + "\" ");
522
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
523
				sb.append("trust=\"" + e.getTrust() + "\"");
524
				sb.append(">");
525
				sb.append(e.getValue());
526
				sb.append("</" + fd.getName() + ">");
527

    
528
				extraInfo.add(sb.toString());
529
			}
530

    
531
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
532
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
533
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
534
		} else {
535
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
536
		}
537
	}
538

    
539
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
540
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
541
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
542
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
543
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
544
		return sb;
545
	}
546

    
547
	private List<String> buildContexts(final Type type) {
548
		final List<String> res = Lists.newArrayList();
549

    
550
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
551

    
552
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
553

    
554
			for (final String context : contextes) {
555

    
556
				String id = "";
557
				for (final String token : Splitter.on("::").split(context)) {
558
					id += token;
559

    
560
					final ContextDef def = contextMapper.get(id);
561

    
562
					if (def == null) throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
563

    
564
					if (def.getName().equals("context")) {
565
						final String xpath = "//context/@id='" + def.getId() + "'";
566
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
567
							document = addContextDef(document.gotoRoot(), def);
568
						}
569
					}
570

    
571
					if (def.getName().equals("category")) {
572
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
573
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
574
					}
575

    
576
					if (def.getName().equals("concept")) {
577
						document = addContextDef(document, def).gotoParent();
578
					}
579
					id += "::";
580
				}
581
			}
582

    
583
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
584
				try {
585
					res.add(asStringElement(x));
586
				} catch (final TransformerException e) {
587
					throw new RuntimeException(e);
588
				}
589
			}
590
		}
591

    
592
		return res;
593
	}
594

    
595
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
596
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
597
		if ((def.getType() != null) && !def.getType().isEmpty()) {
598
			tag.addAttribute("type", def.getType());
599
		}
600
		return tag;
601
	}
602

    
603
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
604
		final StringWriter buffer = new StringWriter();
605
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
606
		return buffer.toString();
607
	}
608

    
609
	@SuppressWarnings("unchecked")
610
	private String getRelFundingTree(final String xmlTree) {
611
		String funding = "<funding>";
612
		try {
613
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
614
			funding = "<funding>";
615
			// String _id = "";
616

    
617
			funding += getFunderElement(ftree);
618

    
619
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
620
				final Element e = (Element) o;
621
				final String _id = e.valueOf("./id");
622
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
623
				// _id += "::";
624
			}
625
		} catch (final DocumentException e) {
626
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
627
		} finally {
628
			funding += "</funding>";
629
		}
630
		return funding;
631
	}
632

    
633
	private String getFunderElement(final Document ftree) {
634
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
635
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
636
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
637
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
638

    
639
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
640
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
641
	}
642

    
643
	private void fillContextMap(final String xmlTree) {
644

    
645
		Document fundingPath;
646
		try {
647
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
648
		} catch (final DocumentException e) {
649
			throw new RuntimeException(e);
650
		}
651
		try {
652
			final Node funder = fundingPath.selectSingleNode("//funder");
653

    
654
			if (funder != null) {
655

    
656
				final String funderShortName = funder.valueOf("./shortname");
657
				contextes.add(funderShortName);
658

    
659
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
660
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
661
				if (level0 != null) {
662
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
663
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
664
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
665
					if (level1 == null) {
666
						contextes.add(level0Id);
667
					} else {
668
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
669
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
670
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
671
						if (level2 == null) {
672
							contextes.add(level1Id);
673
						} else {
674
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
675
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
676
							contextes.add(level2Id);
677
						}
678
					}
679
				}
680
			}
681
		} catch (final NullPointerException e) {
682
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
683
		}
684
	}
685

    
686
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
687
		StringBuilder sb = new StringBuilder();
688
		sb.append("<");
689
		sb.append(name);
690
		if (q != null) {
691
			sb.append(getAttributes(q));
692
		}
693
		if (dataInfo != null) {
694
			sb = dataInfoAsAttributes(sb, dataInfo);
695
		}
696
		if ((value == null) || value.isEmpty()) {
697
			sb.append("/>");
698
			return sb.toString();
699
			// return "<" + name + getAttributes(q) + "/>";
700
		}
701

    
702
		sb.append(">");
703
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
704
		sb.append(escapeXml(value));
705
		sb.append("</");
706
		sb.append(name);
707
		sb.append(">");
708

    
709
		return sb.toString();
710
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
711
	}
712

    
713
	private String getAttributes(final Qualifier q) {
714
		if (q == null) return "";
715

    
716
		final StringBuilder sb = new StringBuilder();
717
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
718
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
719
			sb.append(" ");
720
			sb.append(e.getKey().getName());
721
			sb.append("=\"");
722
			sb.append(escapeXml(e.getValue().toString()));
723
			sb.append("\"");
724
		}
725
		return sb.toString();
726
	}
727

    
728

    
729
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
730
		if(acceptFilter == null) return fields.getAllFields();
731
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
732
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
733
			if (acceptFilter.apply(e.getKey().getName())) {
734
				res.put(e.getKey(), e.getValue());
735
			}
736
		}
737
		return res;
738
	}
739

    
740

    
741

    
742
	private List<String> countersAsXml() {
743
		final List<String> out = Lists.newArrayList();
744
		for (final Entry<String, Integer> e : counters.entrySet()) {
745
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
746
		}
747
		return out;
748
	}
749

    
750
	private void incrementCounter(final String type) {
751
		if (!counters.containsKey(type)) {
752
			counters.put(type, 1);
753
		} else {
754
			counters.put(type, counters.get(type) + 1);
755
		}
756
	}
757

    
758
	@Override
759
	public String toString() {
760
		final StringBuilder sb = new StringBuilder();
761
		sb.append("################################################\n");
762
		sb.append("ID: ").append(key).append("\n");
763
		if (mainEntity != null) {
764
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
765
		}
766
		if (relations != null) {
767
			sb.append("\nRELATIONS:\n");
768
			for (final OafDecoder decoder : relations) {
769
				sb.append(decoder.getOafRel().toString() + "\n");
770
			}
771
		}
772
		if (children != null) {
773
			sb.append("\nCHILDREN:\n");
774
			for (final OafDecoder decoder : children) {
775
				sb.append(decoder.getOafRel().toString() + "\n");
776
			}
777
		}
778
		return sb.toString();
779
	}
780

    
781
}
(7-7/7)