Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
4

    
5
import java.io.StringReader;
6
import java.io.StringWriter;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Map.Entry;
10
import java.util.Set;
11
import java.util.StringTokenizer;
12

    
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerConfigurationException;
16
import javax.xml.transform.TransformerException;
17
import javax.xml.transform.TransformerFactory;
18
import javax.xml.transform.TransformerFactoryConfigurationError;
19
import javax.xml.transform.dom.DOMSource;
20
import javax.xml.transform.stream.StreamResult;
21

    
22
import org.apache.commons.lang.StringEscapeUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.dom4j.Document;
25
import org.dom4j.DocumentException;
26
import org.dom4j.Element;
27
import org.dom4j.io.SAXReader;
28
import org.json.JSONException;
29
import org.json.JSONObject;
30

    
31
import com.google.common.base.Predicate;
32
import com.google.common.collect.Iterators;
33
import com.google.common.collect.Lists;
34
import com.google.common.collect.Maps;
35
import com.google.common.collect.Sets;
36
import com.google.protobuf.Descriptors.EnumValueDescriptor;
37
import com.google.protobuf.Descriptors.FieldDescriptor;
38
import com.google.protobuf.GeneratedMessage;
39
import com.mycila.xmltool.XMLDoc;
40
import com.mycila.xmltool.XMLTag;
41

    
42
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextDef;
43
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
44
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
45
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
46
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClasses;
47
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
48
import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo;
49
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
50
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
51
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
52
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
53
import eu.dnetlib.data.proto.OafProtos.OafEntity;
54
import eu.dnetlib.data.proto.OafProtos.OafRel;
55
import eu.dnetlib.data.proto.PersonProtos.Person;
56
import eu.dnetlib.data.proto.ProjectProtos.Project;
57
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
58
import eu.dnetlib.data.proto.ResultProtos.Result;
59
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
60
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
61
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
62
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
63
import eu.dnetlib.data.proto.TypeProtos.Type;
64
import eu.dnetlib.miscutils.functional.UnaryFunction;
65

    
66
public class XmlRecordFactory {
67

    
68
	protected Set<String> specialDatasourceTypes = Sets.newHashSet("scholarcomminfra", "infospace", "pubsrepository::mock", "entityregistry");
69

    
70
	protected TemplateFactory templateFactory = new TemplateFactory();
71

    
72
	protected OafDecoder mainEntity = null;
73

    
74
	protected String key = null;
75

    
76
	protected List<OafDecoder> relations = Lists.newLinkedList();
77
	protected List<OafDecoder> children = Lists.newLinkedList();
78

    
79
	protected EntityConfigTable entityConfigTable;
80

    
81
	protected ContextMapper contextMapper;
82

    
83
	protected RelClasses relClasses;
84

    
85
	protected String schemaLocation;
86

    
87
	protected boolean entityDefaults;
88
	protected boolean relDefaults;
89
	protected boolean childDefaults;
90

    
91
	protected Set<String> contextes = Sets.newHashSet();
92

    
93
	protected List<String> extraInfo = Lists.newArrayList();
94

    
95
	protected Map<String, Integer> counters = Maps.newHashMap();
96

    
97
	protected Transformer transformer;
98

    
99
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
100
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults)
101
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
102
		this.entityConfigTable = entityConfigTable;
103
		this.contextMapper = contextMapper;
104
		this.relClasses = relClasses;
105
		this.schemaLocation = schemaLocation;
106
		this.entityDefaults = entityDefaults;
107
		this.relDefaults = relDefaults;
108
		this.childDefaults = childDefeaults;
109

    
110
		transformer = TransformerFactory.newInstance().newTransformer();
111
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
112
	}
113

    
114
	public String getId() {
115
		return key;
116
	}
117

    
118
	public boolean isValid() {
119
		return mainEntity != null;
120
	}
121

    
122
	public void setMainEntity(final OafDecoder mainEntity) {
123
		this.mainEntity = mainEntity;
124
		this.key = mainEntity.decodeEntity().getId();
125
	}
126

    
127
	public void addRelation(final OafDecoder rel) {
128
		addRelOrChild(relations, rel);
129
	}
130

    
131
	public void addChild(final OafDecoder child) {
132
		addRelOrChild(children, child);
133
	}
134

    
135
	private void addRelOrChild(final List<OafDecoder> list, final OafDecoder decoder) {
136
		list.add(decoder);
137
	}
138

    
139
	public String build() {
140

    
141
		final OafEntityDecoder entity = mainEntity.decodeEntity();
142
		// System.out.println("building");
143
		// System.out.println("main: " + mainEntity);
144
		// System.out.println("rel:  " + relations);
145
		// System.out.println("chi:  " + children);
146
		// System.out.println("=============");
147

    
148
		final Type type = entity.getType();
149
		final List<String> metadata = decodeType(entity, null, entityDefaults, false);
150

    
151
		// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
152
		final List<String> rels = listRelations();
153
		metadata.addAll(buildContexts(type));
154
		metadata.add(parseDataInfo(mainEntity));
155

    
156
		final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
157

    
158
		// System.out.println("record id: " + recordId);
159
		return templateFactory.buildRecord(type, key, entity.getDateOfCollection(), schemaLocation, body, countersAsXml());
160
	}
161

    
162
	private String parseDataInfo(final OafDecoder decoder) {
163
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
164

    
165
		final StringBuilder sb = new StringBuilder();
166
		sb.append("<datainfo>");
167
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
168
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
169
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
170
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
171
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
172
		sb.append("</datainfo>");
173

    
174
		return sb.toString();
175
	}
176

    
177
	private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
178

    
179
		final List<String> metadata = Lists.newArrayList();
180
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
181
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
182

    
183
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
184
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
185

    
186
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
187
		}
188
		if ((decoder.getEntity() instanceof Person) && !expandingRel) {
189
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
190
		}
191
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
192
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
193
		}
194

    
195
		return metadata;
196
	}
197

    
198
	private Qualifier getBestLicense() {
199
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
200
		final LicenseComparator lc = new LicenseComparator();
201
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
202
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
203
				bestLicense = instance.getLicence();
204
			}
205
		}
206
		return bestLicense;
207
	}
208

    
209
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
210
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
211
	}
212

    
213
	private List<String> listRelations() {
214

    
215
		final List<String> rels = Lists.newArrayList();
216

    
217
		for (final OafDecoder decoder : this.relations) {
218

    
219
			final OafRel rel = decoder.getOafRel();
220
			final OafEntity cachedTarget = rel.getCachedTarget();
221
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
222

    
223
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
224
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
225

    
226
				final List<String> metadata = Lists.newArrayList();
227
				final Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
228
				final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
229
				metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
230

    
231
				String semanticclass = "";
232
				String semanticscheme = "";
233

    
234
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
235

    
236
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
237

    
238
					final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
239
					metadata.addAll(decodeType(OafEntityDecoder.decode(cachedTarget), filter, relDefaults, true));
240
				}
241

    
242
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
243
				// debug
244
				if (relMetadata == null) {
245
					// System.err.println(this);
246
					semanticclass = semanticscheme = "UNKNOWN";
247
				} else {
248
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
249
					semanticscheme = relMetadata.getSemantics().getSchemename();
250
				}
251

    
252
				incrementCounter(relDescriptor.getSubRelType().toString());
253

    
254
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
255

    
256
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
257

    
258
				final DataInfo info = decoder.getOaf().getDataInfo();
259

    
260
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
261
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
262
			}
263
		}
264
		return rels;
265
	}
266

    
267
	private List<String> listChildren() {
268

    
269
		final List<String> children = Lists.newArrayList();
270
		for (final OafDecoder decoder : this.children) {
271
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
272
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
273
		}
274
		final OafEntityDecoder entity = mainEntity.decodeEntity();
275
		if (entity.getType().equals(Type.result)) {
276
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
277
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, null, false, false),
278
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
279

    
280
							@Override
281
							public String evaluate(final String identifier) {
282
								return templateFactory.getWebResource(identifier);
283
							}
284
						})));
285
			}
286
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
287
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
288
				final List<String> fields = listFields(er, null, false, false);
289
				children.add(templateFactory.getChild("externalreference", null, fields));
290
			}
291
		}
292

    
293
		return children;
294
	}
295

    
296
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
297
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
298
		incrementCounter(relDescriptor.getSubRelType().toString());
299
		final Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
300
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
301
	}
302

    
303
	// //////////////////////////////////
304

    
305
	private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
306

    
307
		final List<String> metadata = Lists.newArrayList();
308

    
309
		if (fields != null) {
310

    
311
			final Set<String> seen = Sets.newHashSet();
312
			for (final Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) {
313

    
314
				// final String name = getFieldName(e.getKey().getName());
315
				final String name = e.getKey().getName();
316
				seen.add(name);
317

    
318
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
319
			}
320

    
321
			if (defaults) {
322
				for (final FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
323
					if (!seen.contains(fd.getName())) {
324
						addFieldValue(metadata, fd, getDefault(fd), expandingRel);
325
					}
326
				}
327
			}
328
		}
329
		return metadata;
330
	}
331

    
332
	private Object getDefault(final FieldDescriptor fd) {
333
		switch (fd.getType()) {
334
		case BOOL:
335
			return false;
336
		case BYTES:
337
			return "".getBytes();
338
		case MESSAGE: {
339
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
340
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) return StructuredProperty.newBuilder().setValue("")
341
					.setQualifier(defaultQualifier()).build();
342
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
343
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
344
			return null;
345
		}
346
		case SFIXED32:
347
		case SFIXED64:
348
		case SINT32:
349
		case SINT64:
350
		case INT32:
351
		case INT64:
352
		case DOUBLE:
353
		case FIXED32:
354
		case FIXED64:
355
		case FLOAT:
356
			return 0;
357
		case STRING:
358
			return "";
359
		default:
360
			return null;
361
		}
362
	}
363

    
364
	private Qualifier defaultQualifier() {
365
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
366
	}
367

    
368
	@SuppressWarnings("unchecked")
369
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
370
		if (fd.getName().equals("dateofcollection") || fd.getName().equals("id") || fd.getName().equals("url") || (value == null)) return;
371

    
372
		if (fd.getName().equals("datasourcetype")) {
373
			final String classid = ((Qualifier) value).getClassid();
374

    
375
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
376
			if (specialDatasourceTypes.contains(classid)) {
377
				q.setClassid("other").setClassname("other");
378
			}
379
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
380
		}
381

    
382
		if (fd.isRepeated() && (value instanceof List<?>)) {
383
			for (final Object o : (List<Object>) value) {
384
				guessType(metadata, fd, o, expandingRel);
385
			}
386
		} else {
387
			guessType(metadata, fd, value, expandingRel);
388
		}
389
	}
390

    
391
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
392

    
393
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
394

    
395
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
396
				final Qualifier qualifier = (Qualifier) o;
397
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
398
			}
399

    
400
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
401
				final StructuredProperty sp = (StructuredProperty) o;
402
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
403
			}
404

    
405
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
406
				final KeyValue kv = (KeyValue) o;
407
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
408
			}
409

    
410
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
411
				if (fd.getName().contains("fundingtree")) {
412
					handleFundingTree(metadata, fd, o, expandingRel);
413
				} else {
414
					final StringField sf = (StringField) o;
415
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
416
					if (sf.hasDataInfo()) {
417
						final DataInfo dataInfo = sf.getDataInfo();
418
						dataInfoAsAttributes(sb, dataInfo);
419
					}
420
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
421
					metadata.add(sb.toString());
422
				}
423
			}
424

    
425
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
426
				final Journal j = (Journal) o;
427
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
428
						+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>");
429
			}
430

    
431
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
432
				contextes.add(((Result.Context) o).getId());
433
			}
434

    
435
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
436

    
437
				final ExtraInfo e = (ExtraInfo) o;
438
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
439

    
440
				sb.append("name=\"" + e.getName() + "\" ");
441
				sb.append("typology=\"" + e.getTypology() + "\" ");
442
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
443
				sb.append("trust=\"" + e.getTrust() + "\"");
444
				sb.append(">");
445
				sb.append(e.getValue());
446
				sb.append("</" + fd.getName() + ">");
447

    
448
				extraInfo.add(sb.toString());
449
			}
450

    
451
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
452
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
453
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
454
		} else {
455
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
456
		}
457
	}
458

    
459
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
460
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
461
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
462
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
463
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
464
		return sb;
465
	}
466

    
467
	private List<String> buildContexts(final Type type) {
468
		final List<String> res = Lists.newArrayList();
469

    
470
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
471

    
472
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
473

    
474
			for (final String id : contextes) {
475

    
476
				final StringTokenizer st = new StringTokenizer(id, "::");
477
				String token = "";
478
				while (st.hasMoreTokens()) {
479
					token += st.nextToken();
480

    
481
					final ContextDef def = contextMapper.get(token);
482

    
483
					if (def == null) throw new IllegalStateException("cannot find context for id: " + token);
484

    
485
					if (def.getName().equals("context")) {
486
						final String xpath = "//context/@id='" + def.getId() + "'";
487
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
488
							document = addContextDef(document.gotoRoot(), def);
489
						}
490
					}
491

    
492
					if (def.getName().equals("category")) {
493
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
494
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
495
					}
496

    
497
					if (def.getName().equals("concept")) {
498
						document = addContextDef(document, def).gotoParent();
499
					}
500
					token += "::";
501
				}
502
			}
503

    
504
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
505
				try {
506
					res.add(asStringElement(x));
507
				} catch (final TransformerException e) {
508
					throw new RuntimeException(e);
509
				}
510
			}
511
		}
512

    
513
		return res;
514
	}
515

    
516
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
517
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
518
		if ((def.getType() != null) && !def.getType().isEmpty()) {
519
			tag.addAttribute("type", def.getType());
520
		}
521
		return tag;
522
	}
523

    
524
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
525
		final StringWriter buffer = new StringWriter();
526
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
527
		return buffer.toString();
528
	}
529

    
530
	@SuppressWarnings("unchecked")
531
	private void handleFundingTree(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
532
		final String xmlTree = asXmlJSon(fd.getName(), o instanceof StringField ? ((StringField) o).getValue() : o.toString());
533
		if (expandingRel) {
534
			try {
535
				final Document ftree = new SAXReader().read(new StringReader(xmlTree));
536

    
537
				int i = 0;
538
				String funding = "<funding>";
539
				String _id = "";
540

    
541
				for (final Object id : Lists.reverse(ftree.selectNodes("//fundingtree//name"))) {
542
					_id += ((Element) id).getText();
543
					funding += "<funding_level_" + i + ">" + escapeXml(_id) + "</funding_level_" + i + ">";
544
					_id += "::";
545
					i++;
546
				}
547
				funding += "</funding>";
548
				// System.out.println("-------------------------------\n" + xmlTree + "\n" + funding);
549
				metadata.add(funding);
550
			} catch (final DocumentException e) {
551
				System.err.println("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
552
			}
553
		} else {
554
			metadata.add(xmlTree);
555
		}
556
	}
557

    
558
	private String asXmlJSon(final String root, final String json) {
559
		try {
560
			if ((json == null) || json.isEmpty()) return "<" + root + "/>";
561
			final JSONObject o = new JSONObject(json.replace("'", ""));
562

    
563
			final String contextId = parseFundingJson(o).toLowerCase();
564
			contextes.add(contextId);
565

    
566
			final String xml = org.json.XML.toString(o, root);
567
			return xml;
568
		} catch (final Exception e) {
569
			System.err.println("unable to parse json: " + json + "\n" + e.getMessage());
570
			return "<" + root + "/>";
571
		}
572
	}
573

    
574
	private String parseFundingJson(final JSONObject o) {
575
		try {
576
			final String key = (String) Iterators.getOnlyElement(o.keys());
577
			final JSONObject obj = o.getJSONObject(key);
578

    
579
			String id = obj.getString("id").toLowerCase();
580
			if (id.startsWith("welcometrust::")) {
581
				id = StringUtils.substringBeforeLast("uk" + id.replace("welcometrust", ""), "::") + "::" + cleanup(id);
582
			} else if (id.startsWith("wt::wt")) {
583
				id = StringUtils.substringBeforeLast(id.replaceFirst("wt", "uk"), "::") + "::" + cleanup(id);
584
			} else if (id.startsWith("corda_______::")) {
585
				id = id.replace("corda_______::", "ec::");
586
			} else if (id.startsWith("fct_________::")) {
587
				id = "pt::" + id.replace("fct_________", "fct");
588
				if (id.endsWith("::fct")) {
589
					id = StringUtils.substringBeforeLast(id, "::fct");
590
				}
591
			}
592

    
593
			String label = obj.getString("name");
594

    
595
			if (key.endsWith("level_0")) {
596

    
597
				if (id.equals("uk::wt")) {
598
					label = "Wellcome Trust Funding Stream";
599
				}
600
				contextMapper.put(id, new ContextDef(id, label, "category", ""));
601

    
602
				if (id.startsWith("ec::")) {
603
					contextMapper.put("ec", new ContextDef("ec", "European Community", "context", "funding"));
604
				} else if (id.startsWith("uk::")) {
605
					contextMapper.put("uk", new ContextDef("uk", "United Kingdom", "context", "funding"));
606
				} else if (id.startsWith("pt::")) {
607
					contextMapper.put("pt", new ContextDef("pt", "Portugal", "context", "funding"));
608
				}
609
			} else {
610
				contextMapper.put(id, new ContextDef(id, label, "concept", ""));
611
				parseFundingJson(obj.getJSONObject("parent"));
612
			}
613

    
614
			return id;
615
		} catch (final JSONException e) {
616
			throw new RuntimeException(e);
617
		}
618
	}
619

    
620
	private String cleanup(final String id) {
621
		return StringUtils.substring(StringUtils.deleteWhitespace(StringUtils.substringAfterLast(id, "::").replaceAll("[^a-zA-Z]", "")), 0, 20);
622
	}
623

    
624
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
625
		StringBuilder sb = new StringBuilder();
626
		sb.append("<");
627
		sb.append(name);
628
		if (q != null) {
629
			sb.append(getAttributes(q));
630
		}
631
		if (dataInfo != null) {
632
			sb = dataInfoAsAttributes(sb, dataInfo);
633
		}
634
		if ((value == null) || value.isEmpty()) {
635
			sb.append("/>");
636
			return sb.toString();
637
			// return "<" + name + getAttributes(q) + "/>";
638
		}
639

    
640
		sb.append(">");
641
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
642
		sb.append(escapeXml(value));
643
		sb.append("</");
644
		sb.append(name);
645
		sb.append(">");
646

    
647
		return sb.toString();
648
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
649
	}
650

    
651
	private String getAttributes(final Qualifier q) {
652
		if (q == null) return "";
653

    
654
		final StringBuilder sb = new StringBuilder();
655
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
656
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
657
			sb.append(" ");
658
			sb.append(e.getKey().getName());
659
			sb.append("=\"");
660
			sb.append(escapeXml(e.getValue().toString()));
661
			sb.append("\"");
662
		}
663
		return sb.toString();
664
	}
665

    
666
	private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) {
667

    
668
		if (filter != null) {
669
			final Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() {
670

    
671
				@Override
672
				public boolean apply(final FieldDescriptor descriptor) {
673
					if (fields == null) return false;
674
					final String name = descriptor.getName();
675
					return filter.contains(name);
676
				}
677
			};
678
			final Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p);
679
			// System.out.println(
680
			// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" +
681
			// "builder  " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet()));
682
			return filtered.entrySet();
683
		}
684
		return fields.getAllFields().entrySet();
685
	}
686

    
687
	public static String removePrefix(final String s) {
688
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
689
		return s;
690
	}
691

    
692
	public static String escapeXml(final String value) {
693
		return StringEscapeUtils.escapeXml(value).replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
694
		// return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
695
	}
696

    
697
	private List<String> countersAsXml() {
698
		final List<String> out = Lists.newArrayList();
699
		for (final Entry<String, Integer> e : counters.entrySet()) {
700
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
701
		}
702
		return out;
703
	}
704

    
705
	private void incrementCounter(final String type) {
706
		if (!counters.containsKey(type)) {
707
			counters.put(type, 1);
708
		} else {
709
			counters.put(type, counters.get(type) + 1);
710
		}
711
	}
712

    
713
	@Override
714
	public String toString() {
715
		final StringBuilder sb = new StringBuilder();
716
		sb.append("################################################\n");
717
		sb.append("ID: ").append(key).append("\n");
718
		if (mainEntity != null) {
719
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
720
		}
721
		if (relations != null) {
722
			sb.append("\nRELATIONS:\n");
723
			for (final OafDecoder decoder : relations) {
724
				sb.append(decoder.getOafRel().toString() + "\n");
725
			}
726
		}
727
		if (children != null) {
728
			sb.append("\nCHILDREN:\n");
729
			for (final OafDecoder decoder : children) {
730
				sb.append(decoder.getOafRel().toString() + "\n");
731
			}
732
		}
733
		return sb.toString();
734
	}
735

    
736
}
(8-8/8)