Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

    
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.PersonProtos.Person;
30
import eu.dnetlib.data.proto.ProjectProtos.Project;
31
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
32
import eu.dnetlib.data.proto.ResultProtos.Result;
33
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
34
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
35
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
36
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
37
import eu.dnetlib.data.proto.TypeProtos;
38
import eu.dnetlib.data.proto.TypeProtos.Type;
39
import eu.dnetlib.miscutils.functional.UnaryFunction;
40
import org.apache.commons.lang.StringUtils;
41
import org.dom4j.Document;
42
import org.dom4j.DocumentException;
43
import org.dom4j.Element;
44
import org.dom4j.Node;
45
import org.dom4j.io.SAXReader;
46

    
47
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
48

    
49
public class XmlRecordFactory {
50

    
51
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
52

    
53
	private final Map<String, Integer> relCounters = Maps.newHashMap();
54
	protected Set<String> specialDatasourceTypes;
55
	protected TemplateFactory templateFactory = new TemplateFactory();
56
	protected OafDecoder mainEntity = null;
57
	protected String key = null;
58
	protected List<OafDecoder> relations = Lists.newLinkedList();
59
	protected List<OafDecoder> children = Lists.newLinkedList();
60
	protected EntityConfigTable entityConfigTable;
61
	protected ContextMapper contextMapper;
62
	protected RelClasses relClasses;
63
	protected String schemaLocation;
64
	protected boolean entityDefaults;
65
	protected boolean relDefaults;
66
	protected boolean childDefaults;
67
	protected Set<String> contextes = Sets.newHashSet();
68
	protected List<String> extraInfo = Lists.newArrayList();
69
	protected Map<String, Integer> counters = Maps.newHashMap();
70
	protected Transformer transformer;
71

    
72
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
73
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence");
74
		@Override
75
		public boolean apply(final String s) {
76
			return instanceFieldFilter.contains(s);
77
		}
78
	};
79

    
80
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
81
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
82
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
83
		this.entityConfigTable = entityConfigTable;
84
		this.contextMapper = contextMapper;
85
		this.relClasses = relClasses;
86
		this.schemaLocation = schemaLocation;
87
		this.entityDefaults = entityDefaults;
88
		this.relDefaults = relDefaults;
89
		this.childDefaults = childDefeaults;
90
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
91

    
92
		transformer = TransformerFactory.newInstance().newTransformer();
93
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
94
	}
95

    
96
	public static String removePrefix(final String s) {
97
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
98
		return s;
99
	}
100

    
101
	public static String escapeXml(final String value) {
102
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
103
	}
104

    
105
	public Map<String, Integer> getRelCounters() {
106
		return relCounters;
107
	}
108

    
109
	public RelClasses getRelClasses() {
110
		return relClasses;
111
	}
112

    
113
	public String getId() {
114
		return key;
115
	}
116

    
117
	public boolean isValid() {
118
		return mainEntity != null;
119
	}
120

    
121
	public void setMainEntity(final OafDecoder mainEntity) {
122
		this.mainEntity = mainEntity;
123
		this.key = mainEntity.decodeEntity().getId();
124
	}
125

    
126
	public void addRelation(final Type type, final OafDecoder rel) {
127
		addRelOrChild(type, relations, rel);
128
	}
129

    
130
	public void addChild(final Type type, final OafDecoder child) {
131
		addRelOrChild(type, children, child);
132
	}
133

    
134
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
135

    
136
		final OafRel oafRel = decoder.getOafRel();
137
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
138
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
139

    
140
		if (getRelCounters().get(rd) == null) {
141
			getRelCounters().put(rd, 0);
142
		}
143

    
144
		if (ld == null) {
145
			list.add(decoder);
146
			return;
147
		}
148

    
149
		if (ld.getMax() < 0) {
150
			list.add(decoder);
151
			return;
152
		}
153

    
154
		if (getRelCounters().get(rd) < ld.getMax()) {
155
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
156
			list.add(decoder);
157
		}
158
	}
159

    
160
	public String build() {
161
		try {
162
			final OafEntityDecoder entity = mainEntity.decodeEntity();
163
			// log.info("building");
164
			// log.info("main: " + mainEntity);
165
			// log.info("rel:  " + relations);
166
			// log.info("chi:  " + children);
167
			// log.info("=============");
168

    
169
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
170
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
171

    
172
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
173
			final List<String> rels = listRelations();
174
			metadata.addAll(buildContexts(entity.getType()));
175
			metadata.add(parseDataInfo(mainEntity));
176

    
177
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
178

    
179
			return templateFactory
180
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
181
		} catch (final Throwable e) {
182
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
183
		}
184
	}
185

    
186
	private String parseDataInfo(final OafDecoder decoder) {
187
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
188

    
189
		final StringBuilder sb = new StringBuilder();
190
		sb.append("<datainfo>");
191
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
192
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
193
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
194
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
195
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
196
		sb.append("</datainfo>");
197

    
198
		return sb.toString();
199
	}
200

    
201
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
202

    
203
		final List<String> metadata = Lists.newArrayList();
204
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
205
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
206

    
207
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
208
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
209

    
210
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
211
		}
212
		if ((decoder.getEntity() instanceof Person) && !expandingRel) {
213
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
214
		}
215
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
216
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
217
		}
218

    
219
		return metadata;
220
	}
221

    
222
	private Qualifier getBestLicense() {
223
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
224
		final LicenseComparator lc = new LicenseComparator();
225
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
226
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
227
				bestLicense = instance.getLicence();
228
			}
229
		}
230
		return bestLicense;
231
	}
232

    
233
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
234
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
235
	}
236

    
237
	private List<String> listRelations() {
238

    
239
		final List<String> rels = Lists.newArrayList();
240

    
241
		for (final OafDecoder decoder : this.relations) {
242

    
243
			final OafRel rel = decoder.getOafRel();
244
			final OafEntity cachedTarget = rel.getCachedTarget();
245
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
246

    
247
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
248
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
249

    
250
				final List<String> metadata = Lists.newArrayList();
251
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
252
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
253
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
254

    
255
				String semanticclass = "";
256
				String semanticscheme = "";
257

    
258
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
259

    
260
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
261

    
262
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
263
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
264
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
265
					if (d.getType().equals(Type.result)) {
266
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
267
							metadata.addAll(listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
268
						}
269
					}
270
				}
271

    
272
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
273
				// debug
274
				if (relMetadata == null) {
275
					// System.err.println(this);
276
					semanticclass = semanticscheme = "UNKNOWN";
277
				} else {
278
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
279
					semanticscheme = relMetadata.getSemantics().getSchemename();
280
				}
281

    
282
				final String rd = relDescriptor.getSubRelType().toString();
283
				incrementCounter(rd);
284

    
285
				final DataInfo info = decoder.getOaf().getDataInfo();
286
				if (info.getInferred()) {
287
					incrementCounter(rd + "_inferred");
288
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
289
					incrementCounter(rd + "_collected");
290
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
291
					incrementCounter(rd + "_claimed");
292
				}
293

    
294
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
295

    
296
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
297

    
298
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
299
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
300
			}
301
		}
302
		return rels;
303
	}
304

    
305
	// //////////////////////////////////
306

    
307
	private List<String> listChildren() {
308

    
309
		final List<String> children = Lists.newArrayList();
310
		for (final OafDecoder decoder : this.children) {
311
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
312
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
313
		}
314
		final OafEntityDecoder entity = mainEntity.decodeEntity();
315
		if (entity.getType().equals(Type.result)) {
316
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
317

    
318

    
319
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
320
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
321

    
322
							@Override
323
							public String evaluate(final String identifier) {
324
								return templateFactory.getWebResource(identifier);
325
							}
326
						})));
327
			}
328
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
329
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
330
				final List<String> fields = listFields(er, null, false, false);
331
				children.add(templateFactory.getChild("externalreference", null, fields));
332
			}
333
		}
334

    
335
		return children;
336
	}
337

    
338
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
339
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
340
		incrementCounter(relDescriptor.getSubRelType().toString());
341
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
342
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
343
	}
344

    
345
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
346

    
347
		final List<String> metadata = Lists.newArrayList();
348

    
349
		if (fields != null) {
350

    
351
			final Set<String> seen = Sets.newHashSet();
352

    
353
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
354
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
355

    
356
				final String name = e.getKey().getName();
357
				seen.add(name);
358
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
359
			}
360

    
361
			if (defaults) {
362
				final Iterable<FieldDescriptor> unseen = Iterables.filter(fields.getDescriptorForType().getFields(), new Predicate<FieldDescriptor>() {
363
					@Override
364
					public boolean apply(final FieldDescriptor fd) {
365
						return !seen.contains(fd.getName()) && filter.apply(fd.getName());
366
					}
367
				});
368
				for(FieldDescriptor fd : unseen){
369
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
370
				}
371
			}
372
		}
373
		return metadata;
374
	}
375

    
376
	private Object getDefault(final FieldDescriptor fd) {
377
		switch (fd.getType()) {
378
		case BOOL:
379
			return false;
380
		case BYTES:
381
			return "".getBytes();
382
		case MESSAGE: {
383
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
384
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
385
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
386
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
387
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
388
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
389
			return null;
390
		}
391
		case SFIXED32:
392
		case SFIXED64:
393
		case SINT32:
394
		case SINT64:
395
		case INT32:
396
		case INT64:
397
		case DOUBLE:
398
		case FIXED32:
399
		case FIXED64:
400
		case FLOAT:
401
			return 0;
402
		case STRING:
403
			return "";
404
		default:
405
			return null;
406
		}
407
	}
408

    
409
	private Qualifier defaultQualifier() {
410
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
411
	}
412

    
413
	@SuppressWarnings("unchecked")
414
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
415
		if ("dateofcollection".equals(fd.getName()) ||
416
			"dateoftransformation".equals(fd.getName()) ||
417
			"id".equals(fd.getName()) ||
418
				(value == null)) return;
419

    
420
		if (fd.getName().equals("datasourcetype")) {
421
			final String classid = ((Qualifier) value).getClassid();
422

    
423
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
424
			if (specialDatasourceTypes.contains(classid)) {
425
				q.setClassid("other").setClassname("other");
426
			}
427
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
428
		}
429

    
430
		if (fd.isRepeated() && (value instanceof List<?>)) {
431
			for (final Object o : (List<Object>) value) {
432
				guessType(metadata, fd, o, expandingRel);
433
			}
434
		} else {
435
			guessType(metadata, fd, value, expandingRel);
436
		}
437
	}
438

    
439
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
440

    
441
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
442

    
443
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
444
				final Qualifier qualifier = (Qualifier) o;
445
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
446
			}
447

    
448
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
449
				final StructuredProperty sp = (StructuredProperty) o;
450
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
451

    
452
				if (!expandingRel && fd.getName().equals("pid")) {
453
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
454
						incrementCounter("doi");
455
					}
456
				}
457
			}
458

    
459
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
460
				final KeyValue kv = (KeyValue) o;
461
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
462
			}
463

    
464
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
465
				final String fieldName = fd.getName();
466

    
467
				if (fieldName.equals("fundingtree")) {
468
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
469

    
470
					if (expandingRel) {
471
						metadata.add(getRelFundingTree(xmlTree));
472
						fillContextMap(xmlTree);
473
					} else {
474
						metadata.add(xmlTree);
475
					}
476
				} else {
477
					final StringField sf = (StringField) o;
478
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
479
					if (sf.hasDataInfo()) {
480
						final DataInfo dataInfo = sf.getDataInfo();
481
						dataInfoAsAttributes(sb, dataInfo);
482
					}
483
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
484
					metadata.add(sb.toString());
485
				}
486
			}
487

    
488
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
489
				final BoolField bf = (BoolField) o;
490
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
491
				if (bf.hasDataInfo()) {
492
					final DataInfo dataInfo = bf.getDataInfo();
493
					dataInfoAsAttributes(sb, dataInfo);
494
				}
495

    
496
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
497
				metadata.add(sb.toString());
498
			}
499

    
500
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
501
				final Journal j = (Journal) o;
502
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
503
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
504
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
505
			}
506

    
507
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
508
				contextes.add(((Result.Context) o).getId());
509
			}
510

    
511
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
512

    
513
				final ExtraInfo e = (ExtraInfo) o;
514
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
515

    
516
				sb.append("name=\"" + e.getName() + "\" ");
517
				sb.append("typology=\"" + e.getTypology() + "\" ");
518
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
519
				sb.append("trust=\"" + e.getTrust() + "\"");
520
				sb.append(">");
521
				sb.append(e.getValue());
522
				sb.append("</" + fd.getName() + ">");
523

    
524
				extraInfo.add(sb.toString());
525
			}
526

    
527
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
528
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
529
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
530
		} else {
531
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
532
		}
533
	}
534

    
535
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
536
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
537
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
538
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
539
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
540
		return sb;
541
	}
542

    
543
	private List<String> buildContexts(final Type type) {
544
		final List<String> res = Lists.newArrayList();
545

    
546
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
547

    
548
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
549

    
550
			for (final String context : contextes) {
551

    
552
				String id = "";
553
				for (final String token : Splitter.on("::").split(context)) {
554
					id += token;
555

    
556
					final ContextDef def = contextMapper.get(id);
557

    
558
					if (def == null) throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
559

    
560
					if (def.getName().equals("context")) {
561
						final String xpath = "//context/@id='" + def.getId() + "'";
562
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
563
							document = addContextDef(document.gotoRoot(), def);
564
						}
565
					}
566

    
567
					if (def.getName().equals("category")) {
568
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
569
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
570
					}
571

    
572
					if (def.getName().equals("concept")) {
573
						document = addContextDef(document, def).gotoParent();
574
					}
575
					id += "::";
576
				}
577
			}
578

    
579
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
580
				try {
581
					res.add(asStringElement(x));
582
				} catch (final TransformerException e) {
583
					throw new RuntimeException(e);
584
				}
585
			}
586
		}
587

    
588
		return res;
589
	}
590

    
591
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
592
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
593
		if ((def.getType() != null) && !def.getType().isEmpty()) {
594
			tag.addAttribute("type", def.getType());
595
		}
596
		return tag;
597
	}
598

    
599
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
600
		final StringWriter buffer = new StringWriter();
601
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
602
		return buffer.toString();
603
	}
604

    
605
	@SuppressWarnings("unchecked")
606
	private String getRelFundingTree(final String xmlTree) {
607
		String funding = "<funding>";
608
		try {
609
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
610
			funding = "<funding>";
611
			// String _id = "";
612

    
613
			funding += getFunderElement(ftree);
614

    
615
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
616
				final Element e = (Element) o;
617
				final String _id = e.valueOf("./id");
618
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
619
				// _id += "::";
620
			}
621
		} catch (final DocumentException e) {
622
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
623
		} finally {
624
			funding += "</funding>";
625
		}
626
		return funding;
627
	}
628

    
629
	private String getFunderElement(final Document ftree) {
630
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
631
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
632
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
633
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
634

    
635
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
636
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
637
	}
638

    
639
	private void fillContextMap(final String xmlTree) {
640

    
641
		Document fundingPath;
642
		try {
643
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
644
		} catch (final DocumentException e) {
645
			throw new RuntimeException(e);
646
		}
647
		try {
648
			final Node funder = fundingPath.selectSingleNode("//funder");
649

    
650
			if (funder != null) {
651

    
652
				final String funderShortName = funder.valueOf("./shortname");
653
				contextes.add(funderShortName);
654

    
655
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
656
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
657
				if (level0 != null) {
658
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
659
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
660
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
661
					if (level1 == null) {
662
						contextes.add(level0Id);
663
					} else {
664
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
665
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
666
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
667
						if (level2 == null) {
668
							contextes.add(level1Id);
669
						} else {
670
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
671
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
672
							contextes.add(level2Id);
673
						}
674
					}
675
				}
676
			}
677
		} catch (final NullPointerException e) {
678
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
679
		}
680
	}
681

    
682
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
683
		StringBuilder sb = new StringBuilder();
684
		sb.append("<");
685
		sb.append(name);
686
		if (q != null) {
687
			sb.append(getAttributes(q));
688
		}
689
		if (dataInfo != null) {
690
			sb = dataInfoAsAttributes(sb, dataInfo);
691
		}
692
		if ((value == null) || value.isEmpty()) {
693
			sb.append("/>");
694
			return sb.toString();
695
			// return "<" + name + getAttributes(q) + "/>";
696
		}
697

    
698
		sb.append(">");
699
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
700
		sb.append(escapeXml(value));
701
		sb.append("</");
702
		sb.append(name);
703
		sb.append(">");
704

    
705
		return sb.toString();
706
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
707
	}
708

    
709
	private String getAttributes(final Qualifier q) {
710
		if (q == null) return "";
711

    
712
		final StringBuilder sb = new StringBuilder();
713
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
714
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
715
			sb.append(" ");
716
			sb.append(e.getKey().getName());
717
			sb.append("=\"");
718
			sb.append(escapeXml(e.getValue().toString()));
719
			sb.append("\"");
720
		}
721
		return sb.toString();
722
	}
723

    
724

    
725
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
726
		if(acceptFilter == null) return fields.getAllFields();
727
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
728
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
729
			if (acceptFilter.apply(e.getKey().getName())) {
730
				res.put(e.getKey(), e.getValue());
731
			}
732
		}
733
		return res;
734
	}
735

    
736

    
737

    
738
	private List<String> countersAsXml() {
739
		final List<String> out = Lists.newArrayList();
740
		for (final Entry<String, Integer> e : counters.entrySet()) {
741
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
742
		}
743
		return out;
744
	}
745

    
746
	private void incrementCounter(final String type) {
747
		if (!counters.containsKey(type)) {
748
			counters.put(type, 1);
749
		} else {
750
			counters.put(type, counters.get(type) + 1);
751
		}
752
	}
753

    
754
	@Override
755
	public String toString() {
756
		final StringBuilder sb = new StringBuilder();
757
		sb.append("################################################\n");
758
		sb.append("ID: ").append(key).append("\n");
759
		if (mainEntity != null) {
760
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
761
		}
762
		if (relations != null) {
763
			sb.append("\nRELATIONS:\n");
764
			for (final OafDecoder decoder : relations) {
765
				sb.append(decoder.getOafRel().toString() + "\n");
766
			}
767
		}
768
		if (children != null) {
769
			sb.append("\nCHILDREN:\n");
770
			for (final OafDecoder decoder : children) {
771
				sb.append(decoder.getOafRel().toString() + "\n");
772
			}
773
		}
774
		return sb.toString();
775
	}
776

    
777
}
(8-8/8)