Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

    
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Maps;
19
import com.google.common.collect.Sets;
20
import com.google.protobuf.Descriptors.EnumValueDescriptor;
21
import com.google.protobuf.Descriptors.FieldDescriptor;
22
import com.google.protobuf.GeneratedMessage;
23
import com.mycila.xmltool.XMLDoc;
24
import com.mycila.xmltool.XMLTag;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
26
import eu.dnetlib.data.proto.FieldTypeProtos.*;
27
import eu.dnetlib.data.proto.OafProtos.OafEntity;
28
import eu.dnetlib.data.proto.OafProtos.OafRel;
29
import eu.dnetlib.data.proto.ProjectProtos.Project;
30
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
31
import eu.dnetlib.data.proto.ResultProtos.Result;
32
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
33
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35
import eu.dnetlib.data.proto.TypeProtos;
36
import eu.dnetlib.data.proto.TypeProtos.Type;
37
import org.apache.commons.lang3.StringUtils;
38
import org.dom4j.Document;
39
import org.dom4j.DocumentException;
40
import org.dom4j.Element;
41
import org.dom4j.Node;
42
import org.dom4j.io.SAXReader;
43

    
44
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
45

    
46
public class XmlRecordFactory {
47

    
48
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
49

    
50
	private final Map<String, Integer> relCounters = Maps.newHashMap();
51
	protected Set<String> specialDatasourceTypes;
52
	protected TemplateFactory templateFactory = new TemplateFactory();
53
	protected OafDecoder mainEntity = null;
54
	protected String key = null;
55
	protected List<OafDecoder> relations = Lists.newLinkedList();
56
	protected List<OafDecoder> children = Lists.newLinkedList();
57
	protected EntityConfigTable entityConfigTable;
58
	protected ContextMapper contextMapper;
59
	protected RelClasses relClasses;
60
	protected String schemaLocation;
61
	protected boolean entityDefaults;
62
	protected boolean relDefaults;
63
	protected boolean childDefaults;
64
	protected Set<String> contextes = Sets.newHashSet();
65
	protected List<String> extraInfo = Lists.newArrayList();
66
	protected Map<String, Integer> counters = Maps.newHashMap();
67
	protected Transformer transformer;
68

    
69
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
70
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation", "refereed");
71
		@Override
72
		public boolean apply(final String s) {
73
			return instanceFieldFilter.contains(s);
74
		}
75
	};
76

    
77
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
78
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
79
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
80
		this.entityConfigTable = entityConfigTable;
81
		this.contextMapper = contextMapper;
82
		this.relClasses = relClasses;
83
		this.schemaLocation = schemaLocation;
84
		this.entityDefaults = entityDefaults;
85
		this.relDefaults = relDefaults;
86
		this.childDefaults = childDefeaults;
87
		this.specialDatasourceTypes = otherDatasourceTypesUForUI;
88

    
89
		transformer = TransformerFactory.newInstance().newTransformer();
90
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
91
	}
92

    
93
	public static String removePrefix(final String s) {
94
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
95
		return s;
96
	}
97

    
98
	public static String escapeXml(final String value) {
99
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
100
	}
101

    
102
	public Map<String, Integer> getRelCounters() {
103
		return relCounters;
104
	}
105

    
106
	public RelClasses getRelClasses() {
107
		return relClasses;
108
	}
109

    
110
	public String getId() {
111
		return key;
112
	}
113

    
114
	public boolean isValid() {
115
		return mainEntity != null;
116
	}
117

    
118
	public void setMainEntity(final OafDecoder mainEntity) {
119
		this.mainEntity = mainEntity;
120
		this.key = mainEntity.decodeEntity().getId();
121
	}
122

    
123
	public void addRelation(final Type type, final OafDecoder rel) {
124
		addRelOrChild(type, relations, rel);
125
	}
126

    
127
	public void addChild(final Type type, final OafDecoder child) {
128
		addRelOrChild(type, children, child);
129
	}
130

    
131
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
132

    
133
		final OafRel oafRel = decoder.getOafRel();
134
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
135
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
136

    
137
		if (getRelCounters().get(rd) == null) {
138
			getRelCounters().put(rd, 0);
139
		}
140

    
141
		if (ld == null) {
142
			list.add(decoder);
143
			return;
144
		}
145

    
146
		if (ld.getMax() < 0) {
147
			list.add(decoder);
148
			return;
149
		}
150

    
151
		if (getRelCounters().get(rd) < ld.getMax()) {
152
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
153
			list.add(decoder);
154
		}
155
	}
156

    
157
	public String build() {
158
		try {
159
			final OafEntityDecoder entity = mainEntity.decodeEntity();
160
			// log.info("building");
161
			// log.info("main: " + mainEntity);
162
			// log.info("rel:  " + relations);
163
			// log.info("chi:  " + children);
164
			// log.info("=============");
165

    
166
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
167
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
168

    
169
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
170
			final List<String> rels = listRelations();
171
			metadata.addAll(buildContexts(entity.getType()));
172
			metadata.add(parseDataInfo(mainEntity));
173

    
174
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
175

    
176
			return templateFactory
177
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
178
		} catch (final Throwable e) {
179
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
180
		}
181
	}
182

    
183
	private String parseDataInfo(final OafDecoder decoder) {
184
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
185

    
186
		final StringBuilder sb = new StringBuilder();
187
		sb.append("<datainfo>");
188
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
189
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
190
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
191
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
192
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
193
		sb.append("</datainfo>");
194

    
195
		return sb.toString();
196
	}
197

    
198
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
199

    
200
		final List<String> metadata = Lists.newArrayList();
201
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
202
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
203

    
204
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
205
			metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
206

    
207
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
208
		}
209
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
210
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
211
		}
212

    
213
		return metadata;
214
	}
215

    
216
	private Qualifier getBestAccessright() {
217
		Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
218
		final LicenseComparator lc = new LicenseComparator();
219
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
220
			if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
221
				bestAccessRight = instance.getAccessright();
222
			}
223
		}
224
		return bestAccessRight;
225
	}
226

    
227
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
228
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
229
	}
230

    
231
	private List<String> listRelations() {
232

    
233
		final List<String> rels = Lists.newArrayList();
234

    
235
		for (final OafDecoder decoder : this.relations) {
236

    
237
			final OafRel rel = decoder.getOafRel();
238
			final OafEntity cachedTarget = rel.getCachedTarget();
239
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
240

    
241
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
242
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
243

    
244
				final List<String> metadata = Lists.newArrayList();
245
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
246
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
247
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
248

    
249
				String semanticclass = "";
250
				String semanticscheme = "";
251

    
252
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
253

    
254
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
255

    
256
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
257
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
258
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
259
					if (d.getType().equals(Type.result)) {
260
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
261
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
262
							metadata.addAll(fields);
263
						}
264
					}
265
				}
266

    
267
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
268
				// debug
269
				if (relMetadata == null) {
270
					// System.err.println(this);
271
					semanticclass = semanticscheme = "UNKNOWN";
272
				} else {
273
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
274
					semanticscheme = relMetadata.getSemantics().getSchemename();
275
				}
276

    
277
				final String rd = relDescriptor.getSubRelType().toString();
278
				incrementCounter(rd);
279

    
280
				final DataInfo info = decoder.getOaf().getDataInfo();
281
				if (info.getInferred()) {
282
					incrementCounter(rd + "_inferred");
283
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
284
					incrementCounter(rd + "_collected");
285
				} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
286
					incrementCounter(rd + "_claimed");
287
				}
288

    
289
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
290

    
291
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
292

    
293
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
294
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
295
			}
296
		}
297
		return rels;
298
	}
299

    
300
	// //////////////////////////////////
301

    
302
	private List<String> listChildren() {
303

    
304
		final List<String> children = Lists.newArrayList();
305
		for (final OafDecoder decoder : this.children) {
306
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
307
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
308
		}
309
		final OafEntityDecoder entity = mainEntity.decodeEntity();
310
		if (entity.getType().equals(Type.result)) {
311
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
312
				final List<String> instancemetadata = listFields(instance, instanceFilter, false, false);
313

    
314
				if (instance.hasProcessingchargeamount()) {
315
					instancemetadata.add("<processingchargeamount currency=\""
316
							+ instance.getProcessingchargecurrency().getValue() + "\">"
317
							+ instance.getProcessingchargeamount().getValue()
318
							+ "</processingchargeamount>");
319
				}
320

    
321
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), instancemetadata,
322
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
323
			}
324
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
325
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
326
				final List<String> fields = listFields(er, null, false, false);
327
				children.add(templateFactory.getChild("externalreference", null, fields));
328
			}
329
		}
330

    
331
		return children;
332
	}
333

    
334
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
335
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
336
		incrementCounter(relDescriptor.getSubRelType().toString());
337
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
338
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
339
	}
340

    
341
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
342

    
343
		final List<String> metadata = Lists.newArrayList();
344

    
345
		if (fields != null) {
346

    
347
			final Set<String> seen = Sets.newHashSet();
348

    
349
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
350
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
351

    
352
				final String name = e.getKey().getName();
353
				seen.add(name);
354
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
355
			}
356

    
357
			if (defaults) {
358
				final Iterable<FieldDescriptor> unseen =
359
						Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
360
				for(FieldDescriptor fd : unseen){
361
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
362
				}
363
			}
364
		}
365
		return metadata;
366
	}
367

    
368
	private Object getDefault(final FieldDescriptor fd) {
369
		switch (fd.getType()) {
370
		case BOOL:
371
			return false;
372
		case BYTES:
373
			return "".getBytes();
374
		case MESSAGE: {
375
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
376
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
377
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
378
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
379
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
380
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
381
			return null;
382
		}
383
		case SFIXED32:
384
		case SFIXED64:
385
		case SINT32:
386
		case SINT64:
387
		case INT32:
388
		case INT64:
389
		case FIXED32:
390
		case FIXED64:
391
		case DOUBLE:
392
		case FLOAT:
393
		case STRING:
394
			return "";
395
		default:
396
			return null;
397
		}
398
	}
399

    
400
	private Qualifier defaultQualifier() {
401
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
402
	}
403

    
404
	@SuppressWarnings("unchecked")
405
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
406
		if ("dateofcollection".equals(fd.getName()) ||
407
			"dateoftransformation".equals(fd.getName()) ||
408
			"id".equals(fd.getName()) ||
409
				(value == null)) return;
410

    
411
		if (fd.getName().equals("datasourcetype")) {
412
			final String classid = ((Qualifier) value).getClassid();
413

    
414
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
415
			if (specialDatasourceTypes.contains(classid)) {
416
				q.setClassid("other").setClassname("other");
417
			}
418
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
419
		}
420

    
421
		if (fd.isRepeated() && (value instanceof List<?>)) {
422
			for (final Object o : (List<Object>) value) {
423
				guessType(metadata, fd, o, expandingRel);
424
			}
425
		} else {
426
			guessType(metadata, fd, value, expandingRel);
427
		}
428
	}
429

    
430
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
431

    
432
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
433

    
434
			if(Author.getDescriptor().equals(fd.getMessageType())) {
435

    
436
				final Author a = (Author) o;
437

    
438
				final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
439
				if (a.hasName()) {
440
					sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
441
				}
442
				if (a.hasSurname()) {
443
					sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
444
				}
445
				if (a.getPidCount() > 0) {
446
					a.getPidList().stream()
447
							.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
448
							.forEach(kv -> {
449
								String pidType = escapeXml(kv.getKey())
450
										.replaceAll("\\W", "");
451
								String pidValue = escapeXml(kv.getValue());
452
								sb.append(String.format(" %s=\"%s\"", pidType, pidValue));
453
							});
454
				}
455

    
456
				sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
457

    
458
				metadata.add(sb.toString());
459
			}
460

    
461
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
462
				final Qualifier qualifier = (Qualifier) o;
463
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
464
			}
465

    
466
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
467
				final StructuredProperty sp = (StructuredProperty) o;
468
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
469

    
470
				if (!expandingRel && fd.getName().equals("pid")) {
471
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
472
						incrementCounter("doi");
473
					}
474
				}
475
			}
476

    
477
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
478
				final KeyValue kv = (KeyValue) o;
479
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
480
			}
481

    
482
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
483
				final String fieldName = fd.getName();
484

    
485
				if (fieldName.equals("fundingtree")) {
486
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
487

    
488
					if (expandingRel) {
489
						metadata.add(getRelFundingTree(xmlTree));
490
						fillContextMap(xmlTree);
491
					} else {
492
						metadata.add(xmlTree);
493
					}
494
				} else {
495
					final StringField sf = (StringField) o;
496
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
497
					if (sf.hasDataInfo()) {
498
						final DataInfo dataInfo = sf.getDataInfo();
499
						dataInfoAsAttributes(sb, dataInfo);
500
					}
501
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
502
					metadata.add(sb.toString());
503
				}
504
			}
505

    
506
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
507
				final BoolField bf = (BoolField) o;
508
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
509
				if (bf.hasDataInfo()) {
510
					final DataInfo dataInfo = bf.getDataInfo();
511
					dataInfoAsAttributes(sb, dataInfo);
512
				}
513

    
514
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
515
				metadata.add(sb.toString());
516
			}
517

    
518
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
519
				final Journal j = (Journal) o;
520
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
521
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
522
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
523
			}
524

    
525
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
526
				final String contextid = ((Context) o).getId();
527
				contextes.add(contextid);
528
				/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
529
				if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
530
					contextes.add("clarin");
531
				}
532

    
533
			}
534

    
535
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
536

    
537
				final ExtraInfo e = (ExtraInfo) o;
538
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
539

    
540
				sb.append("name=\"" + e.getName() + "\" ");
541
				sb.append("typology=\"" + e.getTypology() + "\" ");
542
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
543
				sb.append("trust=\"" + e.getTrust() + "\"");
544
				sb.append(">");
545
				sb.append(e.getValue());
546
				sb.append("</" + fd.getName() + ">");
547

    
548
				extraInfo.add(sb.toString());
549
			}
550

    
551
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
552
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
553
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
554
		} else {
555
			if(o instanceof String && o.equals("")){
556
				metadata.add(asXmlElement(fd.getName(), "", null, null));
557
			}
558
			else {
559
				switch (fd.getType()) {
560
					case SFIXED32:
561
					case SFIXED64:
562
					case SINT32:
563
					case SINT64:
564
					case INT32:
565
					case INT64:
566
					case FIXED32:
567
					case FIXED64:
568
						metadata.add(asXmlElement(fd.getName(), String.format("%s", o), null, null));
569
						break;
570
					case DOUBLE:
571
					case FLOAT:
572
						metadata.add(asXmlElement(fd.getName(), String.format("%.2f", ((Float) o)), null, null));
573
						break;
574
					default:
575
						metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
576
				}
577
			}
578
		}
579
	}
580

    
581
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
582
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
583
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
584
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
585
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
586
		return sb;
587
	}
588

    
589
	private List<String> buildContexts(final Type type) {
590
		final List<String> res = Lists.newArrayList();
591

    
592
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
593

    
594
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
595

    
596
			for (final String context : contextes) {
597

    
598
				String id = "";
599
				for (final String token : Splitter.on("::").split(context)) {
600
					id += token;
601

    
602
					final ContextDef def = contextMapper.get(id);
603

    
604
					if (def == null) {
605
						continue;
606
						// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
607
					}
608

    
609
					if (def.getName().equals("context")) {
610
						final String xpath = "//context/@id='" + def.getId() + "'";
611
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
612
							document = addContextDef(document.gotoRoot(), def);
613
						}
614
					}
615

    
616
					if (def.getName().equals("category")) {
617
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
618
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
619
					}
620

    
621
					if (def.getName().equals("concept")) {
622
						document = addContextDef(document, def).gotoParent();
623
					}
624
					id += "::";
625
				}
626
			}
627

    
628
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
629
				try {
630
					res.add(asStringElement(x));
631
				} catch (final TransformerException e) {
632
					throw new RuntimeException(e);
633
				}
634
			}
635
		}
636

    
637
		return res;
638
	}
639

    
640
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
641
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
642
		if ((def.getType() != null) && !def.getType().isEmpty()) {
643
			tag.addAttribute("type", def.getType());
644
		}
645
		return tag;
646
	}
647

    
648
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
649
		final StringWriter buffer = new StringWriter();
650
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
651
		return buffer.toString();
652
	}
653

    
654
	@SuppressWarnings("unchecked")
655
	private String getRelFundingTree(final String xmlTree) {
656
		String funding = "<funding>";
657
		try {
658
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
659
			funding = "<funding>";
660
			// String _id = "";
661

    
662
			funding += getFunderElement(ftree);
663

    
664
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
665
				final Element e = (Element) o;
666
				final String _id = e.valueOf("./id");
667
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
668
				// _id += "::";
669
			}
670
		} catch (final DocumentException e) {
671
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
672
		} finally {
673
			funding += "</funding>";
674
		}
675
		return funding;
676
	}
677

    
678
	private String getFunderElement(final Document ftree) {
679
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
680
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
681
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
682
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
683

    
684
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
685
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
686
	}
687

    
688
	private void fillContextMap(final String xmlTree) {
689

    
690
		Document fundingPath;
691
		try {
692
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
693
		} catch (final DocumentException e) {
694
			throw new RuntimeException(e);
695
		}
696
		try {
697
			final Node funder = fundingPath.selectSingleNode("//funder");
698

    
699
			if (funder != null) {
700

    
701
				final String funderShortName = funder.valueOf("./shortname");
702
				contextes.add(funderShortName);
703

    
704
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
705
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
706
				if (level0 != null) {
707
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
708
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
709
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
710
					if (level1 == null) {
711
						contextes.add(level0Id);
712
					} else {
713
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
714
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
715
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
716
						if (level2 == null) {
717
							contextes.add(level1Id);
718
						} else {
719
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
720
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
721
							contextes.add(level2Id);
722
						}
723
					}
724
				}
725
			}
726
		} catch (final NullPointerException e) {
727
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
728
		}
729
	}
730

    
731
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
732
		StringBuilder sb = new StringBuilder();
733
		sb.append("<");
734
		sb.append(name);
735
		if (q != null) {
736
			sb.append(getAttributes(q));
737
		}
738
		if (dataInfo != null) {
739
			sb = dataInfoAsAttributes(sb, dataInfo);
740
		}
741
		if ((value == null) || value.isEmpty()) {
742
			sb.append("/>");
743
			return sb.toString();
744
			// return "<" + name + getAttributes(q) + "/>";
745
		}
746

    
747
		sb.append(">");
748
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
749
		sb.append(escapeXml(value));
750
		sb.append("</");
751
		sb.append(name);
752
		sb.append(">");
753

    
754
		return sb.toString();
755
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
756
	}
757

    
758
	private String getAttributes(final Qualifier q) {
759
		if (q == null) return "";
760

    
761
		final StringBuilder sb = new StringBuilder();
762
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
763
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
764
			sb.append(" ");
765
			sb.append(e.getKey().getName());
766
			sb.append("=\"");
767
			sb.append(escapeXml(e.getValue().toString()));
768
			sb.append("\"");
769
		}
770
		return sb.toString();
771
	}
772

    
773

    
774
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
775
		if(acceptFilter == null) return fields.getAllFields();
776
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
777
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
778
			if (acceptFilter.apply(e.getKey().getName())) {
779
				res.put(e.getKey(), e.getValue());
780
			}
781
		}
782
		return res;
783
	}
784

    
785

    
786

    
787
	private List<String> countersAsXml() {
788
		final List<String> out = Lists.newArrayList();
789
		for (final Entry<String, Integer> e : counters.entrySet()) {
790
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
791
		}
792
		return out;
793
	}
794

    
795
	private void incrementCounter(final String type) {
796
		if (!counters.containsKey(type)) {
797
			counters.put(type, 1);
798
		} else {
799
			counters.put(type, counters.get(type) + 1);
800
		}
801
	}
802

    
803
	@Override
804
	public String toString() {
805
		final StringBuilder sb = new StringBuilder();
806
		sb.append("################################################\n");
807
		sb.append("ID: ").append(key).append("\n");
808
		if (mainEntity != null) {
809
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
810
		}
811
		if (relations != null) {
812
			sb.append("\nRELATIONS:\n");
813
			for (final OafDecoder decoder : relations) {
814
				sb.append(decoder.getOafRel().toString() + "\n");
815
			}
816
		}
817
		if (children != null) {
818
			sb.append("\nCHILDREN:\n");
819
			for (final OafDecoder decoder : children) {
820
				sb.append(decoder.getOafRel().toString() + "\n");
821
			}
822
		}
823
		return sb.toString();
824
	}
825

    
826
}
(8-8/8)