Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
4

    
5
import java.io.StringReader;
6
import java.io.StringWriter;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Map.Entry;
10
import java.util.Set;
11
import java.util.StringTokenizer;
12

    
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerConfigurationException;
16
import javax.xml.transform.TransformerException;
17
import javax.xml.transform.TransformerFactory;
18
import javax.xml.transform.TransformerFactoryConfigurationError;
19
import javax.xml.transform.dom.DOMSource;
20
import javax.xml.transform.stream.StreamResult;
21

    
22
import org.apache.commons.lang.StringUtils;
23
import org.dom4j.Document;
24
import org.dom4j.DocumentException;
25
import org.dom4j.Element;
26
import org.dom4j.io.SAXReader;
27
import org.json.JSONException;
28
import org.json.JSONObject;
29

    
30
import com.google.common.base.Predicate;
31
import com.google.common.collect.Iterators;
32
import com.google.common.collect.Lists;
33
import com.google.common.collect.Maps;
34
import com.google.common.collect.Sets;
35
import com.google.protobuf.Descriptors.EnumValueDescriptor;
36
import com.google.protobuf.Descriptors.FieldDescriptor;
37
import com.google.protobuf.GeneratedMessage;
38
import com.mycila.xmltool.XMLDoc;
39
import com.mycila.xmltool.XMLTag;
40

    
41
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextDef;
42
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
43
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
44
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
45
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClasses;
46
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
47
import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo;
48
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
49
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
50
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
51
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
52
import eu.dnetlib.data.proto.OafProtos.OafEntity;
53
import eu.dnetlib.data.proto.OafProtos.OafRel;
54
import eu.dnetlib.data.proto.PersonProtos.Person;
55
import eu.dnetlib.data.proto.ProjectProtos.Project;
56
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
57
import eu.dnetlib.data.proto.ResultProtos.Result;
58
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
59
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
60
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
61
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
62
import eu.dnetlib.data.proto.TypeProtos.Type;
63
import eu.dnetlib.miscutils.functional.UnaryFunction;
64

    
65
public class XmlRecordFactory {
66

    
67
	protected Set<String> specialDatasourceTypes = Sets.newHashSet("scholarcomminfra", "infospace", "pubsrepository::mock", "entityregistry");
68

    
69
	protected TemplateFactory templateFactory = new TemplateFactory();
70

    
71
	protected OafDecoder mainEntity = null;
72

    
73
	protected String key = null;
74

    
75
	protected List<OafDecoder> relations = Lists.newLinkedList();
76
	protected List<OafDecoder> children = Lists.newLinkedList();
77

    
78
	protected EntityConfigTable entityConfigTable;
79

    
80
	protected ContextMapper contextMapper;
81

    
82
	protected RelClasses relClasses;
83

    
84
	protected String schemaLocation;
85

    
86
	protected boolean entityDefaults;
87
	protected boolean relDefaults;
88
	protected boolean childDefaults;
89

    
90
	protected Set<String> contextes = Sets.newHashSet();
91

    
92
	protected List<String> extraInfo = Lists.newArrayList();
93

    
94
	protected Map<String, Integer> counters = Maps.newHashMap();
95

    
96
	protected Transformer transformer;
97

    
98
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
99
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults)
100
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
101
		this.entityConfigTable = entityConfigTable;
102
		this.contextMapper = contextMapper;
103
		this.relClasses = relClasses;
104
		this.schemaLocation = schemaLocation;
105
		this.entityDefaults = entityDefaults;
106
		this.relDefaults = relDefaults;
107
		this.childDefaults = childDefeaults;
108

    
109
		transformer = TransformerFactory.newInstance().newTransformer();
110
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
111
	}
112

    
113
	public String getId() {
114
		return key;
115
	}
116

    
117
	public boolean isValid() {
118
		return mainEntity != null;
119
	}
120

    
121
	public void setMainEntity(final OafDecoder mainEntity) {
122
		this.mainEntity = mainEntity;
123
		this.key = mainEntity.decodeEntity().getId();
124
	}
125

    
126
	public void addRelation(final OafDecoder rel) {
127
		addRelOrChild(relations, rel);
128
	}
129

    
130
	public void addChild(final OafDecoder child) {
131
		addRelOrChild(children, child);
132
	}
133

    
134
	private void addRelOrChild(final List<OafDecoder> list, final OafDecoder decoder) {
135
		list.add(decoder);
136
	}
137

    
138
	public String build() {
139

    
140
		OafEntityDecoder entity = mainEntity.decodeEntity();
141
		// System.out.println("building");
142
		// System.out.println("main: " + mainEntity);
143
		// System.out.println("rel:  " + relations);
144
		// System.out.println("chi:  " + children);
145
		// System.out.println("=============");
146

    
147
		final Type type = entity.getType();
148
		final List<String> metadata = decodeType(entity, null, entityDefaults, false);
149

    
150
		// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
151
		List<String> rels = listRelations();
152
		metadata.addAll(buildContexts(type));
153
		metadata.add(parseDataInfo(mainEntity));
154

    
155
		final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
156

    
157
		// System.out.println("record id: " + recordId);
158
		return templateFactory.buildRecord(type, key, entity.getDateOfCollection(), schemaLocation, body, countersAsXml());
159
	}
160

    
161
	private String parseDataInfo(final OafDecoder decoder) {
162
		DataInfo dataInfo = decoder.getOaf().getDataInfo();
163

    
164
		StringBuilder sb = new StringBuilder();
165
		sb.append("<datainfo>");
166
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
167
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
168
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
169
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
170
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
171
		sb.append("</datainfo>");
172

    
173
		return sb.toString();
174
	}
175

    
176
	private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
177

    
178
		final List<String> metadata = Lists.newArrayList();
179
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
180
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
181

    
182
		if (decoder.getEntity() instanceof Result && !expandingRel) {
183
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
184

    
185
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
186
		}
187
		if (decoder.getEntity() instanceof Person && !expandingRel) {
188
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
189
		}
190
		if (decoder.getEntity() instanceof Project && !expandingRel) {
191
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
192
		}
193

    
194
		return metadata;
195
	}
196

    
197
	private Qualifier getBestLicense() {
198
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
199
		LicenseComparator lc = new LicenseComparator();
200
		for (Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
201
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
202
				bestLicense = instance.getLicence();
203
			}
204
		}
205
		return bestLicense;
206
	}
207

    
208
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
209
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
210
	}
211

    
212
	private List<String> listRelations() {
213

    
214
		final List<String> rels = Lists.newArrayList();
215

    
216
		for (OafDecoder decoder : this.relations) {
217

    
218
			final OafRel rel = decoder.getOafRel();
219
			final OafEntity cachedTarget = rel.getCachedTarget();
220
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
221

    
222
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
223
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
224

    
225
				final List<String> metadata = Lists.newArrayList();
226
				Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
227
				Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
228
				metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
229

    
230
				String semanticclass = "";
231
				String semanticscheme = "";
232

    
233
				RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
234

    
235
				if (cachedTarget != null && cachedTarget.isInitialized()) {
236

    
237
					final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
238
					metadata.addAll(decodeType(OafEntityDecoder.decode(cachedTarget), filter, relDefaults, true));
239
				}
240

    
241
				RelMetadata relMetadata = relDecoder.getRelMetadata();
242
				// debug
243
				if (relMetadata == null) {
244
					// System.err.println(this);
245
					semanticclass = semanticscheme = "UNKNOWN";
246
				} else {
247
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
248
					semanticscheme = relMetadata.getSemantics().getSchemename();
249
				}
250

    
251
				incrementCounter(relDescriptor.getSubRelType().toString());
252

    
253
				LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
254

    
255
				String relId = ld != null && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
256

    
257
				DataInfo info = decoder.getOaf().getDataInfo();
258

    
259
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
260
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
261
			}
262
		}
263
		return rels;
264
	}
265

    
266
	private List<String> listChildren() {
267

    
268
		final List<String> children = Lists.newArrayList();
269
		for (OafDecoder decoder : this.children) {
270
			OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
271
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
272
		}
273
		OafEntityDecoder entity = mainEntity.decodeEntity();
274
		if (entity.getType().equals(Type.result)) {
275
			for (Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
276
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, null, false, false),
277
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
278

    
279
							@Override
280
							public String evaluate(final String identifier) {
281
								return templateFactory.getWebResource(identifier);
282
							}
283
						})));
284
			}
285
			for (ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
286
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
287
				List<String> fields = listFields(er, null, false, false);
288
				children.add(templateFactory.getChild("externalreference", null, fields));
289
			}
290
		}
291

    
292
		return children;
293
	}
294

    
295
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
296
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
297
		incrementCounter(relDescriptor.getSubRelType().toString());
298
		Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
299
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
300
	}
301

    
302
	// //////////////////////////////////
303

    
304
	private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
305

    
306
		final List<String> metadata = Lists.newArrayList();
307

    
308
		if (fields != null) {
309

    
310
			Set<String> seen = Sets.newHashSet();
311
			for (Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) {
312

    
313
				// final String name = getFieldName(e.getKey().getName());
314
				final String name = e.getKey().getName();
315
				seen.add(name);
316

    
317
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
318
			}
319

    
320
			if (defaults) {
321
				for (FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
322
					if (!seen.contains(fd.getName())) {
323
						addFieldValue(metadata, fd, getDefault(fd), expandingRel);
324
					}
325
				}
326
			}
327
		}
328
		return metadata;
329
	}
330

    
331
	private Object getDefault(final FieldDescriptor fd) {
332
		switch (fd.getType()) {
333
		case BOOL:
334
			return false;
335
		case BYTES:
336
			return "".getBytes();
337
		case MESSAGE: {
338
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) { return defaultQualifier(); }
339
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) { return StructuredProperty.newBuilder().setValue("")
340
					.setQualifier(defaultQualifier()).build(); }
341
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) { return KeyValue.newBuilder().setKey("").setValue("").build(); }
342
			if (StringField.getDescriptor().equals(fd.getMessageType())) { return StringField.newBuilder().setValue("").build(); }
343
			return null;
344
		}
345
		case SFIXED32:
346
		case SFIXED64:
347
		case SINT32:
348
		case SINT64:
349
		case INT32:
350
		case INT64:
351
		case DOUBLE:
352
		case FIXED32:
353
		case FIXED64:
354
		case FLOAT:
355
			return 0;
356
		case STRING:
357
			return "";
358
		default:
359
			return null;
360
		}
361
	}
362

    
363
	private Qualifier defaultQualifier() {
364
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
365
	}
366

    
367
	@SuppressWarnings("unchecked")
368
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
369
		if (fd.getName().equals("dateofcollection") || fd.getName().equals("id") || fd.getName().equals("url") || value == null) { return; }
370

    
371
		if (fd.getName().equals("datasourcetype")) {
372
			String classid = ((Qualifier) value).getClassid();
373

    
374
			Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
375
			if (specialDatasourceTypes.contains(classid)) {
376
				q.setClassid("other").setClassname("other");
377
			}
378
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
379
		}
380

    
381
		if (fd.isRepeated() && value instanceof List<?>) {
382
			for (Object o : (List<Object>) value) {
383
				guessType(metadata, fd, o, expandingRel);
384
			}
385
		} else {
386
			guessType(metadata, fd, value, expandingRel);
387
		}
388
	}
389

    
390
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
391

    
392
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
393

    
394
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
395
				Qualifier qualifier = (Qualifier) o;
396
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
397
			}
398

    
399
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
400
				StructuredProperty sp = (StructuredProperty) o;
401
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
402
			}
403

    
404
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
405
				KeyValue kv = (KeyValue) o;
406
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
407
			}
408

    
409
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
410
				if (fd.getName().contains("fundingtree")) {
411
					handleFundingTree(metadata, fd, o, expandingRel);
412
				} else {
413
					StringField sf = (StringField) o;
414
					StringBuilder sb = new StringBuilder("<" + fd.getName());
415
					if (sf.hasDataInfo()) {
416
						DataInfo dataInfo = sf.getDataInfo();
417
						dataInfoAsAttributes(sb, dataInfo);
418
					}
419
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
420
					metadata.add(sb.toString());
421
				}
422
			}
423

    
424
			if (Journal.getDescriptor().equals(fd.getMessageType()) && o != null) {
425
				Journal j = (Journal) o;
426
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
427
						+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>");
428
			}
429

    
430
			if (Context.getDescriptor().equals(fd.getMessageType()) && o != null) {
431
				contextes.add(((Result.Context) o).getId());
432
			}
433

    
434
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && o != null) {
435

    
436
				ExtraInfo e = (ExtraInfo) o;
437
				StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
438

    
439
				sb.append("name=\"" + e.getName() + "\" ");
440
				sb.append("typology=\"" + e.getTypology() + "\" ");
441
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
442
				sb.append("trust=\"" + e.getTrust() + "\"");
443
				sb.append(">");
444
				sb.append(e.getValue());
445
				sb.append("</" + fd.getName() + ">");
446

    
447
				extraInfo.add(sb.toString());
448
			}
449

    
450
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
451
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) { return; }
452
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
453
		} else {
454
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
455
		}
456
	}
457

    
458
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
459
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
460
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
461
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
462
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
463
		return sb;
464
	}
465

    
466
	private List<String> buildContexts(final Type type) {
467
		final List<String> res = Lists.newArrayList();
468

    
469
		if (contextMapper != null && !contextMapper.isEmpty() && type.equals(Type.result)) {
470

    
471
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
472

    
473
			for (String id : contextes) {
474

    
475
				StringTokenizer st = new StringTokenizer(id, "::");
476
				String token = "";
477
				while (st.hasMoreTokens()) {
478
					token += st.nextToken();
479

    
480
					final ContextDef def = contextMapper.get(token);
481

    
482
					if (def == null) { throw new IllegalStateException("cannot find context for id: " + token); }
483

    
484
					if (def.getName().equals("context")) {
485
						String xpath = "//context/@id='" + def.getId() + "'";
486
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
487
							document = addContextDef(document.gotoRoot(), def);
488
						}
489
					}
490

    
491
					if (def.getName().equals("category")) {
492
						String rootId = StringUtils.substringBefore(def.getId(), "::");
493
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
494
					}
495

    
496
					if (def.getName().equals("concept")) {
497
						document = addContextDef(document, def).gotoParent();
498
					}
499
					token += "::";
500
				}
501
			}
502

    
503
			for (org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
504
				try {
505
					res.add(asStringElement(x));
506
				} catch (TransformerException e) {
507
					throw new RuntimeException(e);
508
				}
509
			}
510
		}
511

    
512
		return res;
513
	}
514

    
515
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
516
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
517
		if (def.getType() != null && !def.getType().isEmpty()) {
518
			tag.addAttribute("type", def.getType());
519
		}
520
		return tag;
521
	}
522

    
523
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
524
		StringWriter buffer = new StringWriter();
525
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
526
		return buffer.toString();
527
	}
528

    
529
	@SuppressWarnings("unchecked")
530
	private void handleFundingTree(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
531
		String xmlTree = asXmlJSon(fd.getName(), o instanceof StringField ? ((StringField) o).getValue() : o.toString());
532
		if (expandingRel) {
533
			try {
534
				Document ftree = new SAXReader().read(new StringReader(xmlTree));
535

    
536
				int i = 0;
537
				String funding = "<funding>";
538
				String _id = "";
539

    
540
				for (Object id : Lists.reverse(ftree.selectNodes("//fundingtree//name"))) {
541
					_id += ((Element) id).getText();
542
					funding += "<funding_level_" + i + ">" + escapeXml(_id) + "</funding_level_" + i + ">";
543
					_id += "::";
544
					i++;
545
				}
546
				funding += "</funding>";
547
				// System.out.println("-------------------------------\n" + xmlTree + "\n" + funding);
548
				metadata.add(funding);
549
			} catch (DocumentException e) {
550
				System.err.println("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
551
			}
552
		} else {
553
			metadata.add(xmlTree);
554
		}
555
	}
556

    
557
	private String asXmlJSon(final String root, final String json) {
558
		try {
559
			if (json == null || json.isEmpty()) { return "<" + root + "/>"; }
560
			JSONObject o = new JSONObject(json.replace("'", ""));
561

    
562
			String contextId = parseFundingJson(o).toLowerCase();
563
			contextes.add(contextId);
564

    
565
			String xml = org.json.XML.toString(o, root);
566
			return xml;
567
		} catch (Exception e) {
568
			System.err.println("unable to parse json: " + json + "\n" + e.getMessage());
569
			return "<" + root + "/>";
570
		}
571
	}
572

    
573
	private String parseFundingJson(final JSONObject o) {
574
		try {
575
			String key = (String) Iterators.getOnlyElement(o.keys());
576
			JSONObject obj = o.getJSONObject(key);
577

    
578
			String id = obj.getString("id").toLowerCase();
579
			if (id.startsWith("welcometrust::")) {
580
				id = StringUtils.substringBeforeLast("uk::" + id.replace("welcometrust", "wt"), "::") + "::" + cleanup(id);
581
			} else if (id.startsWith("wt::wt")) {
582
				id = StringUtils.substringBeforeLast(id.replaceFirst("wt", "uk"), "::") + "::" + cleanup(id);
583
			} else if (id.startsWith("corda_______::")) {
584
				id = id.replace("corda_______::", "ec::");
585
			} else if (id.startsWith("fct_________::")) {
586
				id = "pt::" + id.replace("fct_________", "fct");
587
				if (id.endsWith("::fct")) {
588
					id = StringUtils.substringBeforeLast(id, "::fct");
589
				}
590
			}
591

    
592
			String label = obj.getString("name");
593

    
594
			if (key.endsWith("level_0")) {
595

    
596
				if (id.equals("uk::wt")) {
597
					label = "Wellcome Trust Funding Stream";
598
				}
599
				contextMapper.put(id, new ContextDef(id, label, "category", ""));
600

    
601
				if (id.startsWith("ec::")) {
602
					contextMapper.put("ec", new ContextDef("ec", "European Community", "context", "funding"));
603
				} else if (id.startsWith("uk::")) {
604
					contextMapper.put("uk", new ContextDef("uk", "United Kingdom", "context", "funding"));
605
				} else if (id.startsWith("pt::")) {
606
					contextMapper.put("pt", new ContextDef("pt", "Portugal", "context", "funding"));
607
				}
608
			} else {
609
				contextMapper.put(id, new ContextDef(id, label, "concept", ""));
610
				parseFundingJson(obj.getJSONObject("parent"));
611
			}
612

    
613
			return id;
614
		} catch (JSONException e) {
615
			throw new RuntimeException(e);
616
		}
617
	}
618

    
619
	private String cleanup(final String id) {
620
		return StringUtils.substring(StringUtils.deleteWhitespace(StringUtils.substringAfterLast(id, "::").replaceAll("[^a-zA-Z]", "")), 0, 20);
621
	}
622

    
623
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
624
		StringBuilder sb = new StringBuilder();
625
		sb.append("<");
626
		sb.append(name);
627
		if (q != null) {
628
			sb.append(getAttributes(q));
629
		}
630
		if (dataInfo != null) {
631
			sb = dataInfoAsAttributes(sb, dataInfo);
632
		}
633
		if (value == null || value.isEmpty()) {
634
			sb.append("/>");
635
			return sb.toString();
636
			// return "<" + name + getAttributes(q) + "/>";
637
		}
638

    
639
		sb.append(">");
640
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
641
		sb.append(escapeXml(value));
642
		sb.append("</");
643
		sb.append(name);
644
		sb.append(">");
645

    
646
		return sb.toString();
647
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
648
	}
649

    
650
	private String getAttributes(final Qualifier q) {
651
		if (q == null) { return ""; }
652

    
653
		StringBuilder sb = new StringBuilder();
654
		for (Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
655
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
656
			sb.append(" ");
657
			sb.append(e.getKey().getName());
658
			sb.append("=\"");
659
			sb.append(escapeXml(e.getValue().toString()));
660
			sb.append("\"");
661
		}
662
		return sb.toString();
663
	}
664

    
665
	private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) {
666

    
667
		if (filter != null) {
668
			Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() {
669

    
670
				@Override
671
				public boolean apply(final FieldDescriptor descriptor) {
672
					if (fields == null) {
673
					return false;
674
					}
675
					String name = descriptor.getName();
676
					return filter.contains(name);
677
				}
678
			};
679
			Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p);
680
			// System.out.println(
681
			// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" +
682
			// "builder  " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet()));
683
			return filtered.entrySet();
684
		}
685
		return fields.getAllFields().entrySet();
686
	}
687

    
688
	public static String removePrefix(final String s) {
689
		if (s.contains("|")) { return StringUtils.substringAfter(s, "|"); }
690
		return s;
691
	}
692

    
693
	public static String escapeXml(final String value) {
694
		// return StringEscapeUtils.escapeXml(value).replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
695
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
696
	}
697

    
698
	private List<String> countersAsXml() {
699
		List<String> out = Lists.newArrayList();
700
		for (Entry<String, Integer> e : counters.entrySet()) {
701
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
702
		}
703
		return out;
704
	}
705

    
706
	private void incrementCounter(final String type) {
707
		if (!counters.containsKey(type)) {
708
			counters.put(type, 1);
709
		} else {
710
			counters.put(type, counters.get(type) + 1);
711
		}
712
	}
713

    
714
	@Override
715
	public String toString() {
716
		StringBuilder sb = new StringBuilder();
717
		sb.append("################################################\n");
718
		sb.append("ID: ").append(key).append("\n");
719
		if (mainEntity != null) {
720
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
721
		}
722
		if (relations != null) {
723
			sb.append("\nRELATIONS:\n");
724
			for (OafDecoder decoder : relations) {
725
				sb.append(decoder.getOafRel().toString() + "\n");
726
			}
727
		}
728
		if (children != null) {
729
			sb.append("\nCHILDREN:\n");
730
			for (OafDecoder decoder : children) {
731
				sb.append(decoder.getOafRel().toString() + "\n");
732
			}
733
		}
734
		return sb.toString();
735
	}
736

    
737
}
(10-10/10)