Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
4

    
5
import java.io.StringReader;
6
import java.io.StringWriter;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Map.Entry;
10
import java.util.Set;
11
import java.util.StringTokenizer;
12

    
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerConfigurationException;
16
import javax.xml.transform.TransformerException;
17
import javax.xml.transform.TransformerFactory;
18
import javax.xml.transform.TransformerFactoryConfigurationError;
19
import javax.xml.transform.dom.DOMSource;
20
import javax.xml.transform.stream.StreamResult;
21

    
22
import org.apache.commons.lang.StringEscapeUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.dom4j.Document;
25
import org.dom4j.DocumentException;
26
import org.dom4j.Element;
27
import org.dom4j.Node;
28
import org.dom4j.io.SAXReader;
29

    
30
import com.google.common.base.Joiner;
31
import com.google.common.base.Predicate;
32
import com.google.common.collect.Lists;
33
import com.google.common.collect.Maps;
34
import com.google.common.collect.Sets;
35
import com.google.protobuf.Descriptors.EnumValueDescriptor;
36
import com.google.protobuf.Descriptors.FieldDescriptor;
37
import com.google.protobuf.GeneratedMessage;
38
import com.mycila.xmltool.XMLDoc;
39
import com.mycila.xmltool.XMLTag;
40

    
41
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextDef;
42
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
43
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
44
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
45
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClasses;
46
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
47
import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo;
48
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
49
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
50
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
51
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
52
import eu.dnetlib.data.proto.OafProtos.OafEntity;
53
import eu.dnetlib.data.proto.OafProtos.OafRel;
54
import eu.dnetlib.data.proto.PersonProtos.Person;
55
import eu.dnetlib.data.proto.ProjectProtos.Project;
56
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
57
import eu.dnetlib.data.proto.ResultProtos.Result;
58
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
59
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
60
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
61
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
62
import eu.dnetlib.data.proto.TypeProtos.Type;
63
import eu.dnetlib.miscutils.functional.UnaryFunction;
64

    
65
public class XmlRecordFactory {
66

    
67
	protected Set<String> specialDatasourceTypes = Sets.newHashSet("scholarcomminfra", "infospace", "pubsrepository::mock", "entityregistry");
68

    
69
	protected TemplateFactory templateFactory = new TemplateFactory();
70

    
71
	protected OafDecoder mainEntity = null;
72

    
73
	protected String key = null;
74

    
75
	protected List<OafDecoder> relations = Lists.newLinkedList();
76
	protected List<OafDecoder> children = Lists.newLinkedList();
77

    
78
	protected EntityConfigTable entityConfigTable;
79

    
80
	protected ContextMapper contextMapper;
81

    
82
	protected RelClasses relClasses;
83

    
84
	protected String schemaLocation;
85

    
86
	protected boolean entityDefaults;
87
	protected boolean relDefaults;
88
	protected boolean childDefaults;
89

    
90
	protected Set<String> contextes = Sets.newHashSet();
91

    
92
	protected List<String> extraInfo = Lists.newArrayList();
93

    
94
	protected Map<String, Integer> counters = Maps.newHashMap();
95

    
96
	protected Transformer transformer;
97

    
98
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
99
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults)
100
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
101
		this.entityConfigTable = entityConfigTable;
102
		this.contextMapper = contextMapper;
103
		this.relClasses = relClasses;
104
		this.schemaLocation = schemaLocation;
105
		this.entityDefaults = entityDefaults;
106
		this.relDefaults = relDefaults;
107
		this.childDefaults = childDefeaults;
108

    
109
		transformer = TransformerFactory.newInstance().newTransformer();
110
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
111
	}
112

    
113
	public String getId() {
114
		return key;
115
	}
116

    
117
	public boolean isValid() {
118
		return mainEntity != null;
119
	}
120

    
121
	public void setMainEntity(final OafDecoder mainEntity) {
122
		this.mainEntity = mainEntity;
123
		this.key = mainEntity.decodeEntity().getId();
124
	}
125

    
126
	public void addRelation(final OafDecoder rel) {
127
		addRelOrChild(relations, rel);
128
	}
129

    
130
	public void addChild(final OafDecoder child) {
131
		addRelOrChild(children, child);
132
	}
133

    
134
	private void addRelOrChild(final List<OafDecoder> list, final OafDecoder decoder) {
135
		list.add(decoder);
136
	}
137

    
138
	public String build() {
139

    
140
		final OafEntityDecoder entity = mainEntity.decodeEntity();
141
		// System.out.println("building");
142
		// System.out.println("main: " + mainEntity);
143
		// System.out.println("rel:  " + relations);
144
		// System.out.println("chi:  " + children);
145
		// System.out.println("=============");
146

    
147
		final Type type = entity.getType();
148
		final List<String> metadata = decodeType(entity, null, entityDefaults, false);
149

    
150
		// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
151
		final List<String> rels = listRelations();
152
		metadata.addAll(buildContexts(type));
153
		metadata.add(parseDataInfo(mainEntity));
154

    
155
		final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
156

    
157
		// System.out.println("record id: " + recordId);
158
		return templateFactory.buildRecord(type, key, entity.getDateOfCollection(), schemaLocation, body, countersAsXml());
159
	}
160

    
161
	private String parseDataInfo(final OafDecoder decoder) {
162
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
163

    
164
		final StringBuilder sb = new StringBuilder();
165
		sb.append("<datainfo>");
166
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
167
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
168
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
169
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
170
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
171
		sb.append("</datainfo>");
172

    
173
		return sb.toString();
174
	}
175

    
176
	private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
177

    
178
		final List<String> metadata = Lists.newArrayList();
179
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
180
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
181

    
182
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
183
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
184

    
185
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
186
		}
187
		if ((decoder.getEntity() instanceof Person) && !expandingRel) {
188
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
189
		}
190
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
191
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
192
		}
193

    
194
		return metadata;
195
	}
196

    
197
	private Qualifier getBestLicense() {
198
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
199
		final LicenseComparator lc = new LicenseComparator();
200
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
201
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
202
				bestLicense = instance.getLicence();
203
			}
204
		}
205
		return bestLicense;
206
	}
207

    
208
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
209
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
210
	}
211

    
212
	private List<String> listRelations() {
213

    
214
		final List<String> rels = Lists.newArrayList();
215

    
216
		for (final OafDecoder decoder : this.relations) {
217

    
218
			final OafRel rel = decoder.getOafRel();
219
			final OafEntity cachedTarget = rel.getCachedTarget();
220
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
221

    
222
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
223
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
224

    
225
				final List<String> metadata = Lists.newArrayList();
226
				final Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
227
				final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
228
				metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
229

    
230
				String semanticclass = "";
231
				String semanticscheme = "";
232

    
233
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
234

    
235
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
236

    
237
					final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
238
					metadata.addAll(decodeType(OafEntityDecoder.decode(cachedTarget), filter, relDefaults, true));
239
				}
240

    
241
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
242
				// debug
243
				if (relMetadata == null) {
244
					// System.err.println(this);
245
					semanticclass = semanticscheme = "UNKNOWN";
246
				} else {
247
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
248
					semanticscheme = relMetadata.getSemantics().getSchemename();
249
				}
250

    
251
				incrementCounter(relDescriptor.getSubRelType().toString());
252

    
253
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
254

    
255
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
256

    
257
				final DataInfo info = decoder.getOaf().getDataInfo();
258

    
259
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
260
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
261
			}
262
		}
263
		return rels;
264
	}
265

    
266
	private List<String> listChildren() {
267

    
268
		final List<String> children = Lists.newArrayList();
269
		for (final OafDecoder decoder : this.children) {
270
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
271
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
272
		}
273
		final OafEntityDecoder entity = mainEntity.decodeEntity();
274
		if (entity.getType().equals(Type.result)) {
275
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
276
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, null, false, false),
277
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
278

    
279
							@Override
280
							public String evaluate(final String identifier) {
281
								return templateFactory.getWebResource(identifier);
282
							}
283
						})));
284
			}
285
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
286
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
287
				final List<String> fields = listFields(er, null, false, false);
288
				children.add(templateFactory.getChild("externalreference", null, fields));
289
			}
290
		}
291

    
292
		return children;
293
	}
294

    
295
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
296
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
297
		incrementCounter(relDescriptor.getSubRelType().toString());
298
		final Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
299
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
300
	}
301

    
302
	// //////////////////////////////////
303

    
304
	private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
305

    
306
		final List<String> metadata = Lists.newArrayList();
307

    
308
		if (fields != null) {
309

    
310
			final Set<String> seen = Sets.newHashSet();
311
			for (final Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) {
312

    
313
				// final String name = getFieldName(e.getKey().getName());
314
				final String name = e.getKey().getName();
315
				seen.add(name);
316

    
317
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
318
			}
319

    
320
			if (defaults) {
321
				for (final FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
322
					if (!seen.contains(fd.getName())) {
323
						addFieldValue(metadata, fd, getDefault(fd), expandingRel);
324
					}
325
				}
326
			}
327
		}
328
		return metadata;
329
	}
330

    
331
	private Object getDefault(final FieldDescriptor fd) {
332
		switch (fd.getType()) {
333
		case BOOL:
334
			return false;
335
		case BYTES:
336
			return "".getBytes();
337
		case MESSAGE: {
338
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
339
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) return StructuredProperty.newBuilder().setValue("")
340
					.setQualifier(defaultQualifier()).build();
341
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
342
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
343
			return null;
344
		}
345
		case SFIXED32:
346
		case SFIXED64:
347
		case SINT32:
348
		case SINT64:
349
		case INT32:
350
		case INT64:
351
		case DOUBLE:
352
		case FIXED32:
353
		case FIXED64:
354
		case FLOAT:
355
			return 0;
356
		case STRING:
357
			return "";
358
		default:
359
			return null;
360
		}
361
	}
362

    
363
	private Qualifier defaultQualifier() {
364
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
365
	}
366

    
367
	@SuppressWarnings("unchecked")
368
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
369
		if (fd.getName().equals("dateofcollection") || fd.getName().equals("id") || fd.getName().equals("url") || (value == null)) return;
370

    
371
		if (fd.getName().equals("datasourcetype")) {
372
			final String classid = ((Qualifier) value).getClassid();
373

    
374
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
375
			if (specialDatasourceTypes.contains(classid)) {
376
				q.setClassid("other").setClassname("other");
377
			}
378
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
379
		}
380

    
381
		if (fd.isRepeated() && (value instanceof List<?>)) {
382
			for (final Object o : (List<Object>) value) {
383
				guessType(metadata, fd, o, expandingRel);
384
			}
385
		} else {
386
			guessType(metadata, fd, value, expandingRel);
387
		}
388
	}
389

    
390
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
391

    
392
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
393

    
394
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
395
				final Qualifier qualifier = (Qualifier) o;
396
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
397
			}
398

    
399
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
400
				final StructuredProperty sp = (StructuredProperty) o;
401
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
402
			}
403

    
404
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
405
				final KeyValue kv = (KeyValue) o;
406
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
407
			}
408

    
409
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
410
				final String fieldName = fd.getName();
411

    
412
				if (fieldName.equals("fundingtree")) {
413
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
414

    
415
					if (expandingRel) {
416
						metadata.add(getRelFundingTree(xmlTree));
417
						fillContextMap(xmlTree);
418
					} else {
419
						metadata.add(xmlTree);
420
					}
421
				} else {
422
					final StringField sf = (StringField) o;
423
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
424
					if (sf.hasDataInfo()) {
425
						final DataInfo dataInfo = sf.getDataInfo();
426
						dataInfoAsAttributes(sb, dataInfo);
427
					}
428
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
429
					metadata.add(sb.toString());
430
				}
431
			}
432
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
433
				final Journal j = (Journal) o;
434
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
435
						+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>");
436
			}
437

    
438
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
439
				contextes.add(((Result.Context) o).getId());
440
			}
441

    
442
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
443

    
444
				final ExtraInfo e = (ExtraInfo) o;
445
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
446

    
447
				sb.append("name=\"" + e.getName() + "\" ");
448
				sb.append("typology=\"" + e.getTypology() + "\" ");
449
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
450
				sb.append("trust=\"" + e.getTrust() + "\"");
451
				sb.append(">");
452
				sb.append(e.getValue());
453
				sb.append("</" + fd.getName() + ">");
454

    
455
				extraInfo.add(sb.toString());
456
			}
457

    
458
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
459
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
460
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
461
		} else {
462
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
463
		}
464
	}
465

    
466
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
467
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
468
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
469
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
470
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
471
		return sb;
472
	}
473

    
474
	private List<String> buildContexts(final Type type) {
475
		final List<String> res = Lists.newArrayList();
476

    
477
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
478

    
479
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
480

    
481
			for (final String id : contextes) {
482

    
483
				final StringTokenizer st = new StringTokenizer(id, "::");
484
				String token = "";
485
				while (st.hasMoreTokens()) {
486
					token += st.nextToken();
487

    
488
					final ContextDef def = contextMapper.get(token);
489

    
490
					if (def == null) throw new IllegalStateException("cannot find context for id: " + token);
491

    
492
					if (def.getName().equals("context")) {
493
						final String xpath = "//context/@id='" + def.getId() + "'";
494
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
495
							document = addContextDef(document.gotoRoot(), def);
496
						}
497
					}
498

    
499
					if (def.getName().equals("category")) {
500
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
501
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
502
					}
503

    
504
					if (def.getName().equals("concept")) {
505
						document = addContextDef(document, def).gotoParent();
506
					}
507
					token += "::";
508
				}
509
			}
510

    
511
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
512
				try {
513
					res.add(asStringElement(x));
514
				} catch (final TransformerException e) {
515
					throw new RuntimeException(e);
516
				}
517
			}
518
		}
519

    
520
		return res;
521
	}
522

    
523
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
524
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
525
		if ((def.getType() != null) && !def.getType().isEmpty()) {
526
			tag.addAttribute("type", def.getType());
527
		}
528
		return tag;
529
	}
530

    
531
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
532
		final StringWriter buffer = new StringWriter();
533
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
534
		return buffer.toString();
535
	}
536

    
537
	@SuppressWarnings("unchecked")
538
	private String getRelFundingTree(final String xmlTree) {
539
		String funding = "<funding>";
540
		try {
541
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
542
			funding = "<funding>";
543
			// String _id = "";
544

    
545
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
546
				final Element e = (Element) o;
547
				final String _id = e.valueOf("./id");
548
				funding += "<" + e.getName() + ">" + escapeXml(_id) + "</" + e.getName() + ">";
549
				// _id += "::";
550
			}
551
		} catch (final DocumentException e) {
552
			System.err.println("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
553
		} finally {
554
			funding += "</funding>";
555
		}
556
		return funding;
557
	}
558

    
559
	private void fillContextMap(final String xmlTree) {
560

    
561
		Document fundingPath;
562
		try {
563
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
564
		} catch (final DocumentException e) {
565
			throw new RuntimeException(e);
566
		}
567
		final Node funder = fundingPath.selectSingleNode("//funder");
568
		final String funderShortName = funder.valueOf("./shortname");
569
		contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
570
		final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
571
		final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
572
		contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
573
		final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
574
		final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
575
		contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
576
		final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
577
		if (level2 == null) {
578
			contextes.add(level1Id);
579
		}
580
		else {
581
			final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
582
			contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
583
			contextes.add(level2Id);
584
		}
585
	}
586

    
587
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
588
		StringBuilder sb = new StringBuilder();
589
		sb.append("<");
590
		sb.append(name);
591
		if (q != null) {
592
			sb.append(getAttributes(q));
593
		}
594
		if (dataInfo != null) {
595
			sb = dataInfoAsAttributes(sb, dataInfo);
596
		}
597
		if ((value == null) || value.isEmpty()) {
598
			sb.append("/>");
599
			return sb.toString();
600
			// return "<" + name + getAttributes(q) + "/>";
601
		}
602

    
603
		sb.append(">");
604
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
605
		sb.append(escapeXml(value));
606
		sb.append("</");
607
		sb.append(name);
608
		sb.append(">");
609

    
610
		return sb.toString();
611
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
612
	}
613

    
614
	private String getAttributes(final Qualifier q) {
615
		if (q == null) return "";
616

    
617
		final StringBuilder sb = new StringBuilder();
618
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
619
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
620
			sb.append(" ");
621
			sb.append(e.getKey().getName());
622
			sb.append("=\"");
623
			sb.append(escapeXml(e.getValue().toString()));
624
			sb.append("\"");
625
		}
626
		return sb.toString();
627
	}
628

    
629
	private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) {
630

    
631
		if (filter != null) {
632
			final Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() {
633

    
634
				@Override
635
				public boolean apply(final FieldDescriptor descriptor) {
636
					if (fields == null) return false;
637
					final String name = descriptor.getName();
638
					return filter.contains(name);
639
				}
640
			};
641
			final Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p);
642
			// System.out.println(
643
			// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" +
644
			// "builder  " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet()));
645
			return filtered.entrySet();
646
		}
647
		return fields.getAllFields().entrySet();
648
	}
649

    
650
	public static String removePrefix(final String s) {
651
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
652
		return s;
653
	}
654

    
655
	public static String escapeXml(final String value) {
656
		return StringEscapeUtils.escapeXml(value);
657
		// return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
658
	}
659

    
660
	private List<String> countersAsXml() {
661
		final List<String> out = Lists.newArrayList();
662
		for (final Entry<String, Integer> e : counters.entrySet()) {
663
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
664
		}
665
		return out;
666
	}
667

    
668
	private void incrementCounter(final String type) {
669
		if (!counters.containsKey(type)) {
670
			counters.put(type, 1);
671
		} else {
672
			counters.put(type, counters.get(type) + 1);
673
		}
674
	}
675

    
676
	@Override
677
	public String toString() {
678
		final StringBuilder sb = new StringBuilder();
679
		sb.append("################################################\n");
680
		sb.append("ID: ").append(key).append("\n");
681
		if (mainEntity != null) {
682
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
683
		}
684
		if (relations != null) {
685
			sb.append("\nRELATIONS:\n");
686
			for (final OafDecoder decoder : relations) {
687
				sb.append(decoder.getOafRel().toString() + "\n");
688
			}
689
		}
690
		if (children != null) {
691
			sb.append("\nCHILDREN:\n");
692
			for (final OafDecoder decoder : children) {
693
				sb.append(decoder.getOafRel().toString() + "\n");
694
			}
695
		}
696
		return sb.toString();
697
	}
698

    
699
}
(8-8/8)