1
|
package eu.dnetlib.data.mapreduce.util;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.io.StringWriter;
|
5
|
import java.util.List;
|
6
|
import java.util.Map;
|
7
|
import java.util.Map.Entry;
|
8
|
import java.util.Set;
|
9
|
import javax.xml.transform.*;
|
10
|
import javax.xml.transform.dom.DOMSource;
|
11
|
import javax.xml.transform.stream.StreamResult;
|
12
|
|
13
|
import com.google.common.base.Joiner;
|
14
|
import com.google.common.base.Predicate;
|
15
|
import com.google.common.base.Splitter;
|
16
|
import com.google.common.collect.Iterables;
|
17
|
import com.google.common.collect.Lists;
|
18
|
import com.google.common.collect.Maps;
|
19
|
import com.google.common.collect.Sets;
|
20
|
import com.google.protobuf.Descriptors.EnumValueDescriptor;
|
21
|
import com.google.protobuf.Descriptors.FieldDescriptor;
|
22
|
import com.google.protobuf.GeneratedMessage;
|
23
|
import com.mycila.xmltool.XMLDoc;
|
24
|
import com.mycila.xmltool.XMLTag;
|
25
|
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
|
26
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
27
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
28
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
29
|
import eu.dnetlib.data.proto.ProjectProtos.Project;
|
30
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
31
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
32
|
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
|
33
|
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
|
34
|
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
|
35
|
import eu.dnetlib.data.proto.TypeProtos;
|
36
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
37
|
import org.apache.commons.lang3.StringUtils;
|
38
|
import org.dom4j.Document;
|
39
|
import org.dom4j.DocumentException;
|
40
|
import org.dom4j.Element;
|
41
|
import org.dom4j.Node;
|
42
|
import org.dom4j.io.SAXReader;
|
43
|
|
44
|
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
|
45
|
|
46
|
public class XmlRecordFactory {
|
47
|
|
48
|
// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
|
49
|
|
50
|
private final Map<String, Integer> relCounters = Maps.newHashMap();
|
51
|
protected Set<String> specialDatasourceTypes;
|
52
|
protected TemplateFactory templateFactory = new TemplateFactory();
|
53
|
protected OafDecoder mainEntity = null;
|
54
|
protected String key = null;
|
55
|
protected List<OafDecoder> relations = Lists.newLinkedList();
|
56
|
protected List<OafDecoder> children = Lists.newLinkedList();
|
57
|
protected EntityConfigTable entityConfigTable;
|
58
|
protected ContextMapper contextMapper;
|
59
|
protected RelClasses relClasses;
|
60
|
protected String schemaLocation;
|
61
|
protected boolean entityDefaults;
|
62
|
protected boolean relDefaults;
|
63
|
protected boolean childDefaults;
|
64
|
protected Set<String> contextes = Sets.newHashSet();
|
65
|
protected List<String> extraInfo = Lists.newArrayList();
|
66
|
protected Map<String, Integer> counters = Maps.newHashMap();
|
67
|
protected Transformer transformer;
|
68
|
|
69
|
protected static Predicate<String> instanceFilter = new Predicate<String>() {
|
70
|
final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation");
|
71
|
@Override
|
72
|
public boolean apply(final String s) {
|
73
|
return instanceFieldFilter.contains(s);
|
74
|
}
|
75
|
};
|
76
|
|
77
|
public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
|
78
|
final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
|
79
|
throws TransformerConfigurationException, TransformerFactoryConfigurationError {
|
80
|
this.entityConfigTable = entityConfigTable;
|
81
|
this.contextMapper = contextMapper;
|
82
|
this.relClasses = relClasses;
|
83
|
this.schemaLocation = schemaLocation;
|
84
|
this.entityDefaults = entityDefaults;
|
85
|
this.relDefaults = relDefaults;
|
86
|
this.childDefaults = childDefeaults;
|
87
|
this.specialDatasourceTypes = otherDatasourceTypesUForUI;
|
88
|
|
89
|
transformer = TransformerFactory.newInstance().newTransformer();
|
90
|
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
|
91
|
}
|
92
|
|
93
|
public static String removePrefix(final String s) {
|
94
|
if (s.contains("|")) return StringUtils.substringAfter(s, "|");
|
95
|
return s;
|
96
|
}
|
97
|
|
98
|
public static String escapeXml(final String value) {
|
99
|
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("\"", """).replaceAll("'", "'");
|
100
|
}
|
101
|
|
102
|
public Map<String, Integer> getRelCounters() {
|
103
|
return relCounters;
|
104
|
}
|
105
|
|
106
|
public RelClasses getRelClasses() {
|
107
|
return relClasses;
|
108
|
}
|
109
|
|
110
|
public String getId() {
|
111
|
return key;
|
112
|
}
|
113
|
|
114
|
public boolean isValid() {
|
115
|
return mainEntity != null;
|
116
|
}
|
117
|
|
118
|
public void setMainEntity(final OafDecoder mainEntity) {
|
119
|
this.mainEntity = mainEntity;
|
120
|
this.key = mainEntity.decodeEntity().getId();
|
121
|
}
|
122
|
|
123
|
public void addRelation(final Type type, final OafDecoder rel) {
|
124
|
addRelOrChild(type, relations, rel);
|
125
|
}
|
126
|
|
127
|
public void addChild(final Type type, final OafDecoder child) {
|
128
|
addRelOrChild(type, children, child);
|
129
|
}
|
130
|
|
131
|
private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
|
132
|
|
133
|
final OafRel oafRel = decoder.getOafRel();
|
134
|
final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
|
135
|
final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
|
136
|
|
137
|
if (getRelCounters().get(rd) == null) {
|
138
|
getRelCounters().put(rd, 0);
|
139
|
}
|
140
|
|
141
|
if (ld == null) {
|
142
|
list.add(decoder);
|
143
|
return;
|
144
|
}
|
145
|
|
146
|
if (ld.getMax() < 0) {
|
147
|
list.add(decoder);
|
148
|
return;
|
149
|
}
|
150
|
|
151
|
if (getRelCounters().get(rd) < ld.getMax()) {
|
152
|
getRelCounters().put(rd, getRelCounters().get(rd) + 1);
|
153
|
list.add(decoder);
|
154
|
}
|
155
|
}
|
156
|
|
157
|
public String build() {
|
158
|
try {
|
159
|
final OafEntityDecoder entity = mainEntity.decodeEntity();
|
160
|
// log.info("building");
|
161
|
// log.info("main: " + mainEntity);
|
162
|
// log.info("rel: " + relations);
|
163
|
// log.info("chi: " + children);
|
164
|
// log.info("=============");
|
165
|
|
166
|
final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
|
167
|
final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
|
168
|
|
169
|
// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
|
170
|
final List<String> rels = listRelations();
|
171
|
metadata.addAll(buildContexts(entity.getType()));
|
172
|
metadata.add(parseDataInfo(mainEntity));
|
173
|
|
174
|
final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
|
175
|
|
176
|
return templateFactory
|
177
|
.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
|
178
|
} catch (final Throwable e) {
|
179
|
throw new RuntimeException(String.format("error building record '%s'", this.key), e);
|
180
|
}
|
181
|
}
|
182
|
|
183
|
private String parseDataInfo(final OafDecoder decoder) {
|
184
|
final DataInfo dataInfo = decoder.getOaf().getDataInfo();
|
185
|
|
186
|
final StringBuilder sb = new StringBuilder();
|
187
|
sb.append("<datainfo>");
|
188
|
sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
|
189
|
sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
|
190
|
sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
|
191
|
sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
|
192
|
sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
|
193
|
sb.append("</datainfo>");
|
194
|
|
195
|
return sb.toString();
|
196
|
}
|
197
|
|
198
|
private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
199
|
|
200
|
final List<String> metadata = Lists.newArrayList();
|
201
|
metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
|
202
|
metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
|
203
|
|
204
|
if ((decoder.getEntity() instanceof Result) && !expandingRel) {
|
205
|
metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
|
206
|
|
207
|
metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
|
208
|
}
|
209
|
if ((decoder.getEntity() instanceof Project) && !expandingRel) {
|
210
|
metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
|
211
|
}
|
212
|
|
213
|
return metadata;
|
214
|
}
|
215
|
|
216
|
private Qualifier getBestAccessright() {
|
217
|
Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
|
218
|
final LicenseComparator lc = new LicenseComparator();
|
219
|
for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
|
220
|
if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
|
221
|
bestAccessRight = instance.getAccessright();
|
222
|
}
|
223
|
}
|
224
|
return bestAccessRight;
|
225
|
}
|
226
|
|
227
|
public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
|
228
|
return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
|
229
|
}
|
230
|
|
231
|
private List<String> listRelations() {
|
232
|
|
233
|
final List<String> rels = Lists.newArrayList();
|
234
|
|
235
|
for (final OafDecoder decoder : this.relations) {
|
236
|
|
237
|
final OafRel rel = decoder.getOafRel();
|
238
|
final OafEntity cachedTarget = rel.getCachedTarget();
|
239
|
final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
|
240
|
|
241
|
// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
|
242
|
if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
|
243
|
|
244
|
final List<String> metadata = Lists.newArrayList();
|
245
|
final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
|
246
|
//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
|
247
|
metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
|
248
|
|
249
|
String semanticclass = "";
|
250
|
String semanticscheme = "";
|
251
|
|
252
|
final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
|
253
|
|
254
|
if ((cachedTarget != null) && cachedTarget.isInitialized()) {
|
255
|
|
256
|
//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
|
257
|
final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
|
258
|
metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
|
259
|
if (d.getType().equals(Type.result)) {
|
260
|
for(Instance i : cachedTarget.getResult().getInstanceList()) {
|
261
|
final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
|
262
|
metadata.addAll(fields);
|
263
|
}
|
264
|
}
|
265
|
}
|
266
|
|
267
|
final RelMetadata relMetadata = relDecoder.getRelMetadata();
|
268
|
// debug
|
269
|
if (relMetadata == null) {
|
270
|
// System.err.println(this);
|
271
|
semanticclass = semanticscheme = "UNKNOWN";
|
272
|
} else {
|
273
|
semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
|
274
|
semanticscheme = relMetadata.getSemantics().getSchemename();
|
275
|
}
|
276
|
|
277
|
final String rd = relDescriptor.getSubRelType().toString();
|
278
|
incrementCounter(rd);
|
279
|
|
280
|
final DataInfo info = decoder.getOaf().getDataInfo();
|
281
|
if (info.getInferred()) {
|
282
|
incrementCounter(rd + "_inferred");
|
283
|
} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
|
284
|
incrementCounter(rd + "_collected");
|
285
|
} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
|
286
|
incrementCounter(rd + "_claimed");
|
287
|
}
|
288
|
|
289
|
final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
|
290
|
|
291
|
final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
|
292
|
|
293
|
rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
|
294
|
info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
|
295
|
}
|
296
|
}
|
297
|
return rels;
|
298
|
}
|
299
|
|
300
|
// //////////////////////////////////
|
301
|
|
302
|
private List<String> listChildren() {
|
303
|
|
304
|
final List<String> children = Lists.newArrayList();
|
305
|
for (final OafDecoder decoder : this.children) {
|
306
|
final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
|
307
|
addChildren(children, cachedTarget, decoder.getRelDescriptor());
|
308
|
}
|
309
|
final OafEntityDecoder entity = mainEntity.decodeEntity();
|
310
|
if (entity.getType().equals(Type.result)) {
|
311
|
for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
|
312
|
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
|
313
|
listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
|
314
|
}
|
315
|
for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
|
316
|
// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
|
317
|
final List<String> fields = listFields(er, null, false, false);
|
318
|
children.add(templateFactory.getChild("externalreference", null, fields));
|
319
|
}
|
320
|
}
|
321
|
|
322
|
return children;
|
323
|
}
|
324
|
|
325
|
private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
|
326
|
final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
|
327
|
incrementCounter(relDescriptor.getSubRelType().toString());
|
328
|
final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
|
329
|
children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
|
330
|
}
|
331
|
|
332
|
private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
333
|
|
334
|
final List<String> metadata = Lists.newArrayList();
|
335
|
|
336
|
if (fields != null) {
|
337
|
|
338
|
final Set<String> seen = Sets.newHashSet();
|
339
|
|
340
|
final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
|
341
|
for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
|
342
|
|
343
|
final String name = e.getKey().getName();
|
344
|
seen.add(name);
|
345
|
addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
|
346
|
}
|
347
|
|
348
|
if (defaults) {
|
349
|
final Iterable<FieldDescriptor> unseen =
|
350
|
Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
|
351
|
for(FieldDescriptor fd : unseen){
|
352
|
addFieldValue(metadata, fd, getDefault(fd), expandingRel);
|
353
|
}
|
354
|
}
|
355
|
}
|
356
|
return metadata;
|
357
|
}
|
358
|
|
359
|
private Object getDefault(final FieldDescriptor fd) {
|
360
|
switch (fd.getType()) {
|
361
|
case BOOL:
|
362
|
return false;
|
363
|
case BYTES:
|
364
|
return "".getBytes();
|
365
|
case MESSAGE: {
|
366
|
if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
|
367
|
if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
|
368
|
return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
|
369
|
if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
|
370
|
if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
|
371
|
if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
|
372
|
return null;
|
373
|
}
|
374
|
case SFIXED32:
|
375
|
case SFIXED64:
|
376
|
case SINT32:
|
377
|
case SINT64:
|
378
|
case INT32:
|
379
|
case INT64:
|
380
|
case DOUBLE:
|
381
|
case FIXED32:
|
382
|
case FIXED64:
|
383
|
case FLOAT:
|
384
|
return 0;
|
385
|
case STRING:
|
386
|
return "";
|
387
|
default:
|
388
|
return null;
|
389
|
}
|
390
|
}
|
391
|
|
392
|
private Qualifier defaultQualifier() {
|
393
|
return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
|
394
|
}
|
395
|
|
396
|
@SuppressWarnings("unchecked")
|
397
|
private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
|
398
|
if ("dateofcollection".equals(fd.getName()) ||
|
399
|
"dateoftransformation".equals(fd.getName()) ||
|
400
|
"id".equals(fd.getName()) ||
|
401
|
(value == null)) return;
|
402
|
|
403
|
if (fd.getName().equals("datasourcetype")) {
|
404
|
final String classid = ((Qualifier) value).getClassid();
|
405
|
|
406
|
final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
|
407
|
if (specialDatasourceTypes.contains(classid)) {
|
408
|
q.setClassid("other").setClassname("other");
|
409
|
}
|
410
|
metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
|
411
|
}
|
412
|
|
413
|
if (fd.isRepeated() && (value instanceof List<?>)) {
|
414
|
for (final Object o : (List<Object>) value) {
|
415
|
guessType(metadata, fd, o, expandingRel);
|
416
|
}
|
417
|
} else {
|
418
|
guessType(metadata, fd, value, expandingRel);
|
419
|
}
|
420
|
}
|
421
|
|
422
|
private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
|
423
|
|
424
|
if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
|
425
|
|
426
|
if(Author.getDescriptor().equals(fd.getMessageType())) {
|
427
|
|
428
|
final Author a = (Author) o;
|
429
|
|
430
|
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
|
431
|
if (a.hasName()) {
|
432
|
sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
|
433
|
}
|
434
|
if (a.hasSurname()) {
|
435
|
sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
|
436
|
}
|
437
|
if (a.getPidCount() > 0) {
|
438
|
a.getPidList().stream()
|
439
|
.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
|
440
|
.forEach(kv -> {
|
441
|
String pidType = escapeXml(kv.getKey())
|
442
|
.replaceAll("\\W", "");
|
443
|
String pidValue = escapeXml(kv.getValue());
|
444
|
sb.append(String.format(" %s=\"%s\"", pidType, pidValue));
|
445
|
});
|
446
|
}
|
447
|
|
448
|
sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
|
449
|
|
450
|
metadata.add(sb.toString());
|
451
|
}
|
452
|
|
453
|
if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
|
454
|
final Qualifier qualifier = (Qualifier) o;
|
455
|
metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
|
456
|
}
|
457
|
|
458
|
if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
|
459
|
final StructuredProperty sp = (StructuredProperty) o;
|
460
|
metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
|
461
|
|
462
|
if (!expandingRel && fd.getName().equals("pid")) {
|
463
|
if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
|
464
|
incrementCounter("doi");
|
465
|
}
|
466
|
}
|
467
|
}
|
468
|
|
469
|
if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
|
470
|
final KeyValue kv = (KeyValue) o;
|
471
|
metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
|
472
|
}
|
473
|
|
474
|
if (StringField.getDescriptor().equals(fd.getMessageType())) {
|
475
|
final String fieldName = fd.getName();
|
476
|
|
477
|
if (fieldName.equals("fundingtree")) {
|
478
|
final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
|
479
|
|
480
|
if (expandingRel) {
|
481
|
metadata.add(getRelFundingTree(xmlTree));
|
482
|
fillContextMap(xmlTree);
|
483
|
} else {
|
484
|
metadata.add(xmlTree);
|
485
|
}
|
486
|
} else {
|
487
|
final StringField sf = (StringField) o;
|
488
|
final StringBuilder sb = new StringBuilder("<" + fd.getName());
|
489
|
if (sf.hasDataInfo()) {
|
490
|
final DataInfo dataInfo = sf.getDataInfo();
|
491
|
dataInfoAsAttributes(sb, dataInfo);
|
492
|
}
|
493
|
sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
|
494
|
metadata.add(sb.toString());
|
495
|
}
|
496
|
}
|
497
|
|
498
|
if (BoolField.getDescriptor().equals(fd.getMessageType())) {
|
499
|
final BoolField bf = (BoolField) o;
|
500
|
final StringBuilder sb = new StringBuilder("<" + fd.getName());
|
501
|
if (bf.hasDataInfo()) {
|
502
|
final DataInfo dataInfo = bf.getDataInfo();
|
503
|
dataInfoAsAttributes(sb, dataInfo);
|
504
|
}
|
505
|
|
506
|
sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
|
507
|
metadata.add(sb.toString());
|
508
|
}
|
509
|
|
510
|
if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
511
|
final Journal j = (Journal) o;
|
512
|
metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
|
513
|
+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
|
514
|
+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
|
515
|
}
|
516
|
|
517
|
if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
518
|
final String contextid = ((Context) o).getId();
|
519
|
contextes.add(contextid);
|
520
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
521
|
if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
|
522
|
contextes.add("clarin");
|
523
|
}
|
524
|
|
525
|
}
|
526
|
|
527
|
if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
528
|
|
529
|
final ExtraInfo e = (ExtraInfo) o;
|
530
|
final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
|
531
|
|
532
|
sb.append("name=\"" + e.getName() + "\" ");
|
533
|
sb.append("typology=\"" + e.getTypology() + "\" ");
|
534
|
sb.append("provenance=\"" + e.getProvenance() + "\" ");
|
535
|
sb.append("trust=\"" + e.getTrust() + "\"");
|
536
|
sb.append(">");
|
537
|
sb.append(e.getValue());
|
538
|
sb.append("</" + fd.getName() + ">");
|
539
|
|
540
|
extraInfo.add(sb.toString());
|
541
|
}
|
542
|
|
543
|
} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
|
544
|
if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
|
545
|
metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
|
546
|
} else {
|
547
|
if(fd.getType().equals(FieldDescriptor.Type.FLOAT)){
|
548
|
metadata.add(asXmlElement(fd.getName(), String.format ("%.2f", ((Float) o)), null, null));
|
549
|
} else {
|
550
|
metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
|
551
|
}
|
552
|
}
|
553
|
}
|
554
|
|
555
|
private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
|
556
|
sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
|
557
|
sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
|
558
|
sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
|
559
|
sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
|
560
|
return sb;
|
561
|
}
|
562
|
|
563
|
private List<String> buildContexts(final Type type) {
|
564
|
final List<String> res = Lists.newArrayList();
|
565
|
|
566
|
if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
|
567
|
|
568
|
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
569
|
|
570
|
for (final String context : contextes) {
|
571
|
|
572
|
String id = "";
|
573
|
for (final String token : Splitter.on("::").split(context)) {
|
574
|
id += token;
|
575
|
|
576
|
final ContextDef def = contextMapper.get(id);
|
577
|
|
578
|
if (def == null) {
|
579
|
continue;
|
580
|
// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
|
581
|
}
|
582
|
|
583
|
if (def.getName().equals("context")) {
|
584
|
final String xpath = "//context/@id='" + def.getId() + "'";
|
585
|
if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
|
586
|
document = addContextDef(document.gotoRoot(), def);
|
587
|
}
|
588
|
}
|
589
|
|
590
|
if (def.getName().equals("category")) {
|
591
|
final String rootId = StringUtils.substringBefore(def.getId(), "::");
|
592
|
document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
|
593
|
}
|
594
|
|
595
|
if (def.getName().equals("concept")) {
|
596
|
document = addContextDef(document, def).gotoParent();
|
597
|
}
|
598
|
id += "::";
|
599
|
}
|
600
|
}
|
601
|
|
602
|
for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
|
603
|
try {
|
604
|
res.add(asStringElement(x));
|
605
|
} catch (final TransformerException e) {
|
606
|
throw new RuntimeException(e);
|
607
|
}
|
608
|
}
|
609
|
}
|
610
|
|
611
|
return res;
|
612
|
}
|
613
|
|
614
|
private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
|
615
|
tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
|
616
|
if ((def.getType() != null) && !def.getType().isEmpty()) {
|
617
|
tag.addAttribute("type", def.getType());
|
618
|
}
|
619
|
return tag;
|
620
|
}
|
621
|
|
622
|
private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
|
623
|
final StringWriter buffer = new StringWriter();
|
624
|
transformer.transform(new DOMSource(element), new StreamResult(buffer));
|
625
|
return buffer.toString();
|
626
|
}
|
627
|
|
628
|
@SuppressWarnings("unchecked")
|
629
|
private String getRelFundingTree(final String xmlTree) {
|
630
|
String funding = "<funding>";
|
631
|
try {
|
632
|
final Document ftree = new SAXReader().read(new StringReader(xmlTree));
|
633
|
funding = "<funding>";
|
634
|
// String _id = "";
|
635
|
|
636
|
funding += getFunderElement(ftree);
|
637
|
|
638
|
for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
|
639
|
final Element e = (Element) o;
|
640
|
final String _id = e.valueOf("./id");
|
641
|
funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
|
642
|
// _id += "::";
|
643
|
}
|
644
|
} catch (final DocumentException e) {
|
645
|
throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
|
646
|
} finally {
|
647
|
funding += "</funding>";
|
648
|
}
|
649
|
return funding;
|
650
|
}
|
651
|
|
652
|
private String getFunderElement(final Document ftree) {
|
653
|
final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
|
654
|
final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
|
655
|
final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
|
656
|
final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
|
657
|
|
658
|
return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
|
659
|
+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
|
660
|
}
|
661
|
|
662
|
private void fillContextMap(final String xmlTree) {
|
663
|
|
664
|
Document fundingPath;
|
665
|
try {
|
666
|
fundingPath = new SAXReader().read(new StringReader(xmlTree));
|
667
|
} catch (final DocumentException e) {
|
668
|
throw new RuntimeException(e);
|
669
|
}
|
670
|
try {
|
671
|
final Node funder = fundingPath.selectSingleNode("//funder");
|
672
|
|
673
|
if (funder != null) {
|
674
|
|
675
|
final String funderShortName = funder.valueOf("./shortname");
|
676
|
contextes.add(funderShortName);
|
677
|
|
678
|
contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
|
679
|
final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
|
680
|
if (level0 != null) {
|
681
|
final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
|
682
|
contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
|
683
|
final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
|
684
|
if (level1 == null) {
|
685
|
contextes.add(level0Id);
|
686
|
} else {
|
687
|
final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
|
688
|
contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
|
689
|
final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
|
690
|
if (level2 == null) {
|
691
|
contextes.add(level1Id);
|
692
|
} else {
|
693
|
final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
|
694
|
contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
|
695
|
contextes.add(level2Id);
|
696
|
}
|
697
|
}
|
698
|
}
|
699
|
}
|
700
|
} catch (final NullPointerException e) {
|
701
|
throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
|
702
|
}
|
703
|
}
|
704
|
|
705
|
private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
|
706
|
StringBuilder sb = new StringBuilder();
|
707
|
sb.append("<");
|
708
|
sb.append(name);
|
709
|
if (q != null) {
|
710
|
sb.append(getAttributes(q));
|
711
|
}
|
712
|
if (dataInfo != null) {
|
713
|
sb = dataInfoAsAttributes(sb, dataInfo);
|
714
|
}
|
715
|
if ((value == null) || value.isEmpty()) {
|
716
|
sb.append("/>");
|
717
|
return sb.toString();
|
718
|
// return "<" + name + getAttributes(q) + "/>";
|
719
|
}
|
720
|
|
721
|
sb.append(">");
|
722
|
// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
|
723
|
sb.append(escapeXml(value));
|
724
|
sb.append("</");
|
725
|
sb.append(name);
|
726
|
sb.append(">");
|
727
|
|
728
|
return sb.toString();
|
729
|
// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
|
730
|
}
|
731
|
|
732
|
private String getAttributes(final Qualifier q) {
|
733
|
if (q == null) return "";
|
734
|
|
735
|
final StringBuilder sb = new StringBuilder();
|
736
|
for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
|
737
|
// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
|
738
|
sb.append(" ");
|
739
|
sb.append(e.getKey().getName());
|
740
|
sb.append("=\"");
|
741
|
sb.append(escapeXml(e.getValue().toString()));
|
742
|
sb.append("\"");
|
743
|
}
|
744
|
return sb.toString();
|
745
|
}
|
746
|
|
747
|
|
748
|
private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
|
749
|
if(acceptFilter == null) return fields.getAllFields();
|
750
|
final Map<FieldDescriptor, Object> res = Maps.newHashMap();
|
751
|
for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
|
752
|
if (acceptFilter.apply(e.getKey().getName())) {
|
753
|
res.put(e.getKey(), e.getValue());
|
754
|
}
|
755
|
}
|
756
|
return res;
|
757
|
}
|
758
|
|
759
|
|
760
|
|
761
|
private List<String> countersAsXml() {
|
762
|
final List<String> out = Lists.newArrayList();
|
763
|
for (final Entry<String, Integer> e : counters.entrySet()) {
|
764
|
out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
|
765
|
}
|
766
|
return out;
|
767
|
}
|
768
|
|
769
|
private void incrementCounter(final String type) {
|
770
|
if (!counters.containsKey(type)) {
|
771
|
counters.put(type, 1);
|
772
|
} else {
|
773
|
counters.put(type, counters.get(type) + 1);
|
774
|
}
|
775
|
}
|
776
|
|
777
|
@Override
|
778
|
public String toString() {
|
779
|
final StringBuilder sb = new StringBuilder();
|
780
|
sb.append("################################################\n");
|
781
|
sb.append("ID: ").append(key).append("\n");
|
782
|
if (mainEntity != null) {
|
783
|
sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
|
784
|
}
|
785
|
if (relations != null) {
|
786
|
sb.append("\nRELATIONS:\n");
|
787
|
for (final OafDecoder decoder : relations) {
|
788
|
sb.append(decoder.getOafRel().toString() + "\n");
|
789
|
}
|
790
|
}
|
791
|
if (children != null) {
|
792
|
sb.append("\nCHILDREN:\n");
|
793
|
for (final OafDecoder decoder : children) {
|
794
|
sb.append(decoder.getOafRel().toString() + "\n");
|
795
|
}
|
796
|
}
|
797
|
return sb.toString();
|
798
|
}
|
799
|
|
800
|
}
|