Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

    
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.ProjectProtos.Project;
30
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
31
import eu.dnetlib.data.proto.ResultProtos.Result;
32
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
33
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35
import eu.dnetlib.data.proto.TypeProtos;
36
import eu.dnetlib.data.proto.TypeProtos.Type;
37
import org.apache.commons.lang3.StringUtils;
38
import org.dom4j.Document;
39
import org.dom4j.DocumentException;
40
import org.dom4j.Element;
41
import org.dom4j.Node;
42
import org.dom4j.io.SAXReader;
43

    
44
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
45

    
46
public class XmlRecordFactory {
47

    
48
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
49

    
50
	private final Map<String, Integer> relCounters = Maps.newHashMap();
51
	protected Set<String> specialDatasourceTypes;
52
	protected TemplateFactory templateFactory = new TemplateFactory();
53
	protected OafDecoder mainEntity = null;
54
	protected String key = null;
55
	protected List<OafDecoder> relations = Lists.newLinkedList();
56
	protected List<OafDecoder> children = Lists.newLinkedList();
57
	protected EntityConfigTable entityConfigTable;
58
	protected ContextMapper contextMapper;
59
	protected RelClasses relClasses;
60
	protected String schemaLocation;
61
	protected boolean entityDefaults;
62
	protected boolean relDefaults;
63
	protected boolean childDefaults;
64
	protected Set<String> contextes = Sets.newHashSet();
65
	protected List<String> extraInfo = Lists.newArrayList();
66
	protected Map<String, Integer> counters = Maps.newHashMap();
67
	protected Transformer transformer;
68

    
69
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
70
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation");
71
		@Override
72
		public boolean apply(final String s) {
73
			return instanceFieldFilter.contains(s);
74
		}
75
	};
76

    
77
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
78
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
79
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
80
		this.entityConfigTable = entityConfigTable;
81
		this.contextMapper = contextMapper;
82
		this.relClasses = relClasses;
83
		this.schemaLocation = schemaLocation;
84
		this.entityDefaults = entityDefaults;
85
		this.relDefaults = relDefaults;
86
		this.childDefaults = childDefeaults;
87
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
88

    
89
		transformer = TransformerFactory.newInstance().newTransformer();
90
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
91
	}
92

    
93
	public static String removePrefix(final String s) {
94
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
95
		return s;
96
	}
97

    
98
	public static String escapeXml(final String value) {
99
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
100
	}
101

    
102
	public Map<String, Integer> getRelCounters() {
103
		return relCounters;
104
	}
105

    
106
	public RelClasses getRelClasses() {
107
		return relClasses;
108
	}
109

    
110
	public String getId() {
111
		return key;
112
	}
113

    
114
	public boolean isValid() {
115
		return mainEntity != null;
116
	}
117

    
118
	public void setMainEntity(final OafDecoder mainEntity) {
119
		this.mainEntity = mainEntity;
120
		this.key = mainEntity.decodeEntity().getId();
121
	}
122

    
123
	public void addRelation(final Type type, final OafDecoder rel) {
124
		addRelOrChild(type, relations, rel);
125
	}
126

    
127
	public void addChild(final Type type, final OafDecoder child) {
128
		addRelOrChild(type, children, child);
129
	}
130

    
131
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
132

    
133
		final OafRel oafRel = decoder.getOafRel();
134
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
135
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
136

    
137
		if (getRelCounters().get(rd) == null) {
138
			getRelCounters().put(rd, 0);
139
		}
140

    
141
		if (ld == null) {
142
			list.add(decoder);
143
			return;
144
		}
145

    
146
		if (ld.getMax() < 0) {
147
			list.add(decoder);
148
			return;
149
		}
150

    
151
		if (getRelCounters().get(rd) < ld.getMax()) {
152
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
153
			list.add(decoder);
154
		}
155
	}
156

    
157
	public String build() {
158
		try {
159
			final OafEntityDecoder entity = mainEntity.decodeEntity();
160
			// log.info("building");
161
			// log.info("main: " + mainEntity);
162
			// log.info("rel:  " + relations);
163
			// log.info("chi:  " + children);
164
			// log.info("=============");
165

    
166
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
167
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
168

    
169
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
170
			final List<String> rels = listRelations();
171
			metadata.addAll(buildContexts(entity.getType()));
172
			metadata.add(parseDataInfo(mainEntity));
173

    
174
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
175

    
176
			return templateFactory
177
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
178
		} catch (final Throwable e) {
179
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
180
		}
181
	}
182

    
183
	private String parseDataInfo(final OafDecoder decoder) {
184
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
185

    
186
		final StringBuilder sb = new StringBuilder();
187
		sb.append("<datainfo>");
188
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
189
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
190
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
191
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
192
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
193
		sb.append("</datainfo>");
194

    
195
		return sb.toString();
196
	}
197

    
198
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
199

    
200
		final List<String> metadata = Lists.newArrayList();
201
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
202
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
203

    
204
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
205
			metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
206

    
207
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
208
		}
209
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
210
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
211
		}
212

    
213
		return metadata;
214
	}
215

    
216
	private Qualifier getBestAccessright() {
217
		Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
218
		final LicenseComparator lc = new LicenseComparator();
219
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
220
			if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
221
				bestAccessRight = instance.getAccessright();
222
			}
223
		}
224
		return bestAccessRight;
225
	}
226

    
227
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
228
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
229
	}
230

    
231
	private List<String> listRelations() {
232

    
233
		final List<String> rels = Lists.newArrayList();
234

    
235
		for (final OafDecoder decoder : this.relations) {
236

    
237
			final OafRel rel = decoder.getOafRel();
238
			final OafEntity cachedTarget = rel.getCachedTarget();
239
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
240

    
241
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
242
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
243

    
244
				final List<String> metadata = Lists.newArrayList();
245
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
246
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
247
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
248

    
249
				String semanticclass = "";
250
				String semanticscheme = "";
251

    
252
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
253

    
254
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
255

    
256
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
257
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
258
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
259
					if (d.getType().equals(Type.result)) {
260
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
261
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
262
							metadata.addAll(fields);
263
						}
264
					}
265
				}
266

    
267
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
268
				// debug
269
				if (relMetadata == null) {
270
					// System.err.println(this);
271
					semanticclass = semanticscheme = "UNKNOWN";
272
				} else {
273
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
274
					semanticscheme = relMetadata.getSemantics().getSchemename();
275
				}
276

    
277
				final String rd = relDescriptor.getSubRelType().toString();
278
				incrementCounter(rd);
279

    
280
				final DataInfo info = decoder.getOaf().getDataInfo();
281
				if (info.getInferred()) {
282
					incrementCounter(rd + "_inferred");
283
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
284
					incrementCounter(rd + "_collected");
285
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
286
					incrementCounter(rd + "_claimed");
287
				}
288

    
289
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
290

    
291
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
292

    
293
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
294
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
295
			}
296
		}
297
		return rels;
298
	}
299

    
300
	// //////////////////////////////////
301

    
302
	private List<String> listChildren() {
303

    
304
		final List<String> children = Lists.newArrayList();
305
		for (final OafDecoder decoder : this.children) {
306
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
307
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
308
		}
309
		final OafEntityDecoder entity = mainEntity.decodeEntity();
310
		if (entity.getType().equals(Type.result)) {
311
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
312
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
313
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
314
			}
315
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
316
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
317
				final List<String> fields = listFields(er, null, false, false);
318
				children.add(templateFactory.getChild("externalreference", null, fields));
319
			}
320
		}
321

    
322
		return children;
323
	}
324

    
325
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
326
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
327
		incrementCounter(relDescriptor.getSubRelType().toString());
328
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
329
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
330
	}
331

    
332
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
333

    
334
		final List<String> metadata = Lists.newArrayList();
335

    
336
		if (fields != null) {
337

    
338
			final Set<String> seen = Sets.newHashSet();
339

    
340
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
341
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
342

    
343
				final String name = e.getKey().getName();
344
				seen.add(name);
345
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
346
			}
347

    
348
			if (defaults) {
349
				final Iterable<FieldDescriptor> unseen =
350
						Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
351
				for(FieldDescriptor fd : unseen){
352
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
353
				}
354
			}
355
		}
356
		return metadata;
357
	}
358

    
359
	private Object getDefault(final FieldDescriptor fd) {
360
		switch (fd.getType()) {
361
		case BOOL:
362
			return false;
363
		case BYTES:
364
			return "".getBytes();
365
		case MESSAGE: {
366
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
367
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
368
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
369
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
370
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
371
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
372
			return null;
373
		}
374
		case SFIXED32:
375
		case SFIXED64:
376
		case SINT32:
377
		case SINT64:
378
		case INT32:
379
		case INT64:
380
		case DOUBLE:
381
		case FIXED32:
382
		case FIXED64:
383
		case FLOAT:
384
			return 0;
385
		case STRING:
386
			return "";
387
		default:
388
			return null;
389
		}
390
	}
391

    
392
	private Qualifier defaultQualifier() {
393
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
394
	}
395

    
396
	@SuppressWarnings("unchecked")
397
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
398
		if ("dateofcollection".equals(fd.getName()) ||
399
			"dateoftransformation".equals(fd.getName()) ||
400
			"id".equals(fd.getName()) ||
401
				(value == null)) return;
402

    
403
		if (fd.getName().equals("datasourcetype")) {
404
			final String classid = ((Qualifier) value).getClassid();
405

    
406
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
407
			if (specialDatasourceTypes.contains(classid)) {
408
				q.setClassid("other").setClassname("other");
409
			}
410
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
411
		}
412

    
413
		if (fd.isRepeated() && (value instanceof List<?>)) {
414
			for (final Object o : (List<Object>) value) {
415
				guessType(metadata, fd, o, expandingRel);
416
			}
417
		} else {
418
			guessType(metadata, fd, value, expandingRel);
419
		}
420
	}
421

    
422
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
423

    
424
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
425

    
426
			if(Author.getDescriptor().equals(fd.getMessageType())) {
427

    
428
				final Author a = (Author) o;
429

    
430
				final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
431
				if (a.hasName()) {
432
					sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
433
				}
434
				if (a.hasSurname()) {
435
					sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
436
				}
437
				if (a.getPidCount() > 0) {
438
					a.getPidList().stream()
439
							.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
440
							.forEach(kv -> {
441
								String pidType = escapeXml(kv.getKey())
442
										.replaceAll("\\W", "");
443
								String pidValue = escapeXml(kv.getValue());
444
								sb.append(String.format(" %s=\"%s\"", pidType, pidValue));
445
							});
446
				}
447

    
448
				sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
449

    
450
				metadata.add(sb.toString());
451
			}
452

    
453
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
454
				final Qualifier qualifier = (Qualifier) o;
455
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
456
			}
457

    
458
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
459
				final StructuredProperty sp = (StructuredProperty) o;
460
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
461

    
462
				if (!expandingRel && fd.getName().equals("pid")) {
463
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
464
						incrementCounter("doi");
465
					}
466
				}
467
			}
468

    
469
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
470
				final KeyValue kv = (KeyValue) o;
471
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
472
			}
473

    
474
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
475
				final String fieldName = fd.getName();
476

    
477
				if (fieldName.equals("fundingtree")) {
478
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
479

    
480
					if (expandingRel) {
481
						metadata.add(getRelFundingTree(xmlTree));
482
						fillContextMap(xmlTree);
483
					} else {
484
						metadata.add(xmlTree);
485
					}
486
				} else {
487
					final StringField sf = (StringField) o;
488
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
489
					if (sf.hasDataInfo()) {
490
						final DataInfo dataInfo = sf.getDataInfo();
491
						dataInfoAsAttributes(sb, dataInfo);
492
					}
493
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
494
					metadata.add(sb.toString());
495
				}
496
			}
497

    
498
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
499
				final BoolField bf = (BoolField) o;
500
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
501
				if (bf.hasDataInfo()) {
502
					final DataInfo dataInfo = bf.getDataInfo();
503
					dataInfoAsAttributes(sb, dataInfo);
504
				}
505

    
506
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
507
				metadata.add(sb.toString());
508
			}
509

    
510
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
511
				final Journal j = (Journal) o;
512
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
513
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
514
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
515
			}
516

    
517
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
518
				final String contextid = ((Context) o).getId();
519
				contextes.add(contextid);
520
				/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
521
				if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
522
					contextes.add("clarin");
523
				}
524

    
525
			}
526

    
527
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
528

    
529
				final ExtraInfo e = (ExtraInfo) o;
530
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
531

    
532
				sb.append("name=\"" + e.getName() + "\" ");
533
				sb.append("typology=\"" + e.getTypology() + "\" ");
534
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
535
				sb.append("trust=\"" + e.getTrust() + "\"");
536
				sb.append(">");
537
				sb.append(e.getValue());
538
				sb.append("</" + fd.getName() + ">");
539

    
540
				extraInfo.add(sb.toString());
541
			}
542

    
543
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
544
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
545
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
546
		} else {
547
			if(fd.getType().equals(FieldDescriptor.Type.FLOAT)){
548
				metadata.add(asXmlElement(fd.getName(), String.format ("%.2f", ((Float) o)), null, null));
549
			} else {
550
				metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
551
			}
552
		}
553
	}
554

    
555
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
556
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
557
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
558
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
559
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
560
		return sb;
561
	}
562

    
563
	private List<String> buildContexts(final Type type) {
564
		final List<String> res = Lists.newArrayList();
565

    
566
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
567

    
568
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
569

    
570
			for (final String context : contextes) {
571

    
572
				String id = "";
573
				for (final String token : Splitter.on("::").split(context)) {
574
					id += token;
575

    
576
					final ContextDef def = contextMapper.get(id);
577

    
578
					if (def == null) {
579
						continue;
580
						// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
581
					}
582

    
583
					if (def.getName().equals("context")) {
584
						final String xpath = "//context/@id='" + def.getId() + "'";
585
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
586
							document = addContextDef(document.gotoRoot(), def);
587
						}
588
					}
589

    
590
					if (def.getName().equals("category")) {
591
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
592
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
593
					}
594

    
595
					if (def.getName().equals("concept")) {
596
						document = addContextDef(document, def).gotoParent();
597
					}
598
					id += "::";
599
				}
600
			}
601

    
602
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
603
				try {
604
					res.add(asStringElement(x));
605
				} catch (final TransformerException e) {
606
					throw new RuntimeException(e);
607
				}
608
			}
609
		}
610

    
611
		return res;
612
	}
613

    
614
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
615
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
616
		if ((def.getType() != null) && !def.getType().isEmpty()) {
617
			tag.addAttribute("type", def.getType());
618
		}
619
		return tag;
620
	}
621

    
622
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
623
		final StringWriter buffer = new StringWriter();
624
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
625
		return buffer.toString();
626
	}
627

    
628
	@SuppressWarnings("unchecked")
629
	private String getRelFundingTree(final String xmlTree) {
630
		String funding = "<funding>";
631
		try {
632
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
633
			funding = "<funding>";
634
			// String _id = "";
635

    
636
			funding += getFunderElement(ftree);
637

    
638
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
639
				final Element e = (Element) o;
640
				final String _id = e.valueOf("./id");
641
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
642
				// _id += "::";
643
			}
644
		} catch (final DocumentException e) {
645
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
646
		} finally {
647
			funding += "</funding>";
648
		}
649
		return funding;
650
	}
651

    
652
	private String getFunderElement(final Document ftree) {
653
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
654
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
655
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
656
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
657

    
658
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
659
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
660
	}
661

    
662
	private void fillContextMap(final String xmlTree) {
663

    
664
		Document fundingPath;
665
		try {
666
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
667
		} catch (final DocumentException e) {
668
			throw new RuntimeException(e);
669
		}
670
		try {
671
			final Node funder = fundingPath.selectSingleNode("//funder");
672

    
673
			if (funder != null) {
674

    
675
				final String funderShortName = funder.valueOf("./shortname");
676
				contextes.add(funderShortName);
677

    
678
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
679
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
680
				if (level0 != null) {
681
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
682
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
683
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
684
					if (level1 == null) {
685
						contextes.add(level0Id);
686
					} else {
687
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
688
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
689
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
690
						if (level2 == null) {
691
							contextes.add(level1Id);
692
						} else {
693
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
694
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
695
							contextes.add(level2Id);
696
						}
697
					}
698
				}
699
			}
700
		} catch (final NullPointerException e) {
701
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
702
		}
703
	}
704

    
705
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
706
		StringBuilder sb = new StringBuilder();
707
		sb.append("<");
708
		sb.append(name);
709
		if (q != null) {
710
			sb.append(getAttributes(q));
711
		}
712
		if (dataInfo != null) {
713
			sb = dataInfoAsAttributes(sb, dataInfo);
714
		}
715
		if ((value == null) || value.isEmpty()) {
716
			sb.append("/>");
717
			return sb.toString();
718
			// return "<" + name + getAttributes(q) + "/>";
719
		}
720

    
721
		sb.append(">");
722
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
723
		sb.append(escapeXml(value));
724
		sb.append("</");
725
		sb.append(name);
726
		sb.append(">");
727

    
728
		return sb.toString();
729
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
730
	}
731

    
732
	private String getAttributes(final Qualifier q) {
733
		if (q == null) return "";
734

    
735
		final StringBuilder sb = new StringBuilder();
736
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
737
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
738
			sb.append(" ");
739
			sb.append(e.getKey().getName());
740
			sb.append("=\"");
741
			sb.append(escapeXml(e.getValue().toString()));
742
			sb.append("\"");
743
		}
744
		return sb.toString();
745
	}
746

    
747

    
748
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
749
		if(acceptFilter == null) return fields.getAllFields();
750
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
751
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
752
			if (acceptFilter.apply(e.getKey().getName())) {
753
				res.put(e.getKey(), e.getValue());
754
			}
755
		}
756
		return res;
757
	}
758

    
759

    
760

    
761
	private List<String> countersAsXml() {
762
		final List<String> out = Lists.newArrayList();
763
		for (final Entry<String, Integer> e : counters.entrySet()) {
764
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
765
		}
766
		return out;
767
	}
768

    
769
	private void incrementCounter(final String type) {
770
		if (!counters.containsKey(type)) {
771
			counters.put(type, 1);
772
		} else {
773
			counters.put(type, counters.get(type) + 1);
774
		}
775
	}
776

    
777
	@Override
778
	public String toString() {
779
		final StringBuilder sb = new StringBuilder();
780
		sb.append("################################################\n");
781
		sb.append("ID: ").append(key).append("\n");
782
		if (mainEntity != null) {
783
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
784
		}
785
		if (relations != null) {
786
			sb.append("\nRELATIONS:\n");
787
			for (final OafDecoder decoder : relations) {
788
				sb.append(decoder.getOafRel().toString() + "\n");
789
			}
790
		}
791
		if (children != null) {
792
			sb.append("\nCHILDREN:\n");
793
			for (final OafDecoder decoder : children) {
794
				sb.append(decoder.getOafRel().toString() + "\n");
795
			}
796
		}
797
		return sb.toString();
798
	}
799

    
800
}
(8-8/8)