Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import com.google.common.base.Predicate;
4
import com.google.common.base.Predicates;
5
import com.google.common.base.Splitter;
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import com.google.protobuf.Descriptors.FieldDescriptor;
10
import com.google.protobuf.InvalidProtocolBufferException;
11
import com.google.protobuf.Message;
12
import com.google.protobuf.Message.Builder;
13
import com.google.protobuf.ProtocolMessageEnum;
14
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
15
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
16
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
17
import eu.dnetlib.data.proto.FieldTypeProtos.*;
18
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
19
import eu.dnetlib.data.proto.KindProtos.Kind;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import eu.dnetlib.miscutils.collections.Pair;
22
import org.apache.commons.codec.binary.Base64;
23
import org.apache.commons.codec.binary.Hex;
24
import org.apache.commons.lang3.StringUtils;
25
import org.apache.commons.lang3.math.NumberUtils;
26
import org.w3c.dom.NamedNodeMap;
27
import org.w3c.dom.Node;
28
import org.w3c.dom.NodeList;
29

    
30
import java.nio.charset.Charset;
31
import java.security.MessageDigest;
32
import java.util.Iterator;
33
import java.util.List;
34
import java.util.Map;
35

    
36
public abstract class AbstractDNetXsltFunctions {
37

    
38
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
39
	private static final int MAX_NSPREFIX_LEN = 12;
40

    
41

    
42
	public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
43

    
44
	// Builder for Entities
45
	protected static DNGF getOaf(final DNGFEntity.Builder entity, final DataInfo.Builder info) {
46
		return _getOaf(DNGF.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
47
	}
48

    
49
	// Builder for Rels
50
	protected static DNGF getOaf(final DNGFRel.Builder rel, final DataInfo.Builder info) {
51
		return _getOaf(DNGF.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
52
	}
53

    
54
	private static DNGF.Builder _getOaf(final DNGF.Builder oaf, final DataInfo.Builder info) {
55
		if (info != null) {
56
			return oaf.setDataInfo(ensureDataInfo(info));
57
		} else return oaf;
58
	}
59

    
60
	protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
61
		if (info.isInitialized()) return info;
62
		return getDataInfo(null, "UNKNOWN", "0.9", false, false);
63
	}
64

    
65
	protected static KeyValue getKV(final String id, final String name) {
66
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
67
	}
68

    
69
	protected static DNGFRel.Builder getRel(
70
			final String sourceId,
71
			final Type sourceType,
72
			final String targetId,
73
			final Type targetType,
74
			final Qualifier relType,
75
			final boolean isChild) {
76
		return DNGFRel.newBuilder().setSource(sourceId).setTarget(targetId).setRelType(relType)
77
				.setChild(isChild);
78
	}
79

    
80
	protected static DNGFEntity.Builder getEntity(final Type type,
81
			final String id,
82
			final KeyValue collectedFrom,
83
			final List<String> originalIds,
84
			final String dateOfCollection,
85
			final String dateOfTransformation,
86
			final List<StructuredProperty> pids) {
87
		final DNGFEntity.Builder builder = DNGFEntity.newBuilder().setType(type).setId(id);
88
		if (collectedFrom != null) builder.addCollectedfrom(collectedFrom);
89
		builder.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation);
90
		builder.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection);
91

    
92
		if ((originalIds != null) && !originalIds.isEmpty()) {
93
			builder.addAllOriginalId(Iterables.filter(originalIds, getPredicateNotBlankString()));
94
		}
95

    
96
		if ((pids != null) && !pids.isEmpty()) {
97
			builder.addAllPid(Iterables.filter(pids, Predicates.notNull()));
98
		}
99

    
100
		return builder;
101
	}
102

    
103
    public static Predicate<String> getPredicateNotBlankString() {
104
        return s -> StringUtils.isNotBlank(s);
105
	}
106

    
107
	public static DataInfo.Builder getDataInfo(final NodeList about,
108
			final String provenanceaction,
109
			final String trust,
110
			final boolean deletedbyinference,
111
			final boolean inferred) {
112

    
113
		final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder();
114
		dataInfoBuilder.setInferred(Boolean.valueOf(inferred));
115
		dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(deletedbyinference));
116
		dataInfoBuilder.setTrust(trust);
117
		dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
118

    
119
		// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
120
		if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) {
121

    
122
			final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo");
123
			if (dataInfoElement != null) {
124
				org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred");
125
				dataInfoBuilder.setInferred(Boolean.valueOf(getStringValue(elem, String.valueOf(inferred))));
126

    
127
				elem = getDirectChild(dataInfoElement, "deletedbyinference");
128
				dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(getStringValue(elem, String.valueOf(deletedbyinference))));
129

    
130
				elem = getDirectChild(dataInfoElement, "trust");
131
				dataInfoBuilder.setTrust(getStringValue(elem, trust));
132

    
133
				elem = getDirectChild(dataInfoElement, "inferenceprovenance");
134
				dataInfoBuilder.setInferenceprovenance(getStringValue(elem));
135

    
136
				elem = getDirectChild(dataInfoElement, "provenanceaction");
137
				final Qualifier.Builder pBuilder = Qualifier.newBuilder();
138
				if (elem != null && elem.hasAttributes()) {
139
					final NamedNodeMap attributes = elem.getAttributes();
140
					pBuilder.setClassid(getAttributeValue(attributes, "classid"));
141
					pBuilder.setClassname(getAttributeValue(attributes, "classname"));
142
					pBuilder.setSchemeid(getAttributeValue(attributes, "schemeid"));
143
					pBuilder.setSchemename(getAttributeValue(attributes, "schemename"));
144
				} else {
145
					pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
146
				}
147
				dataInfoBuilder.setProvenanceaction(pBuilder);
148
			}
149
		}
150

    
151
		return dataInfoBuilder;
152
	}
153

    
154
	protected static OAIProvenance getOAIProvenance(final NodeList about) {
155

    
156
		OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder();
157

    
158
		if (((about != null) && (about.getLength() > 0))) {
159

    
160
			final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance");
161

    
162
			if (provenance != null) {
163
				final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription");
164
				oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder()));
165
			}
166
		}
167

    
168
		return oaiProv.build();
169
	}
170

    
171
	private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) {
172
		od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered")));
173

    
174
		org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL");
175
		od.setBaseURL(getStringValue(elem));
176

    
177
		elem = getDirectChild(origDesc, "identifier");
178
		od.setIdentifier(getStringValue(elem));
179

    
180
		elem = getDirectChild(origDesc, "datestamp");
181
		od.setDatestamp(getStringValue(elem));
182

    
183
		elem = getDirectChild(origDesc, "metadataNamespace");
184
		od.setMetadataNamespace(getStringValue(elem));
185

    
186
		elem = getDirectChild(origDesc, "originDescription");
187

    
188
		if (elem != null) {
189

    
190
			od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder()));
191
		}
192

    
193
		return od.build();
194
	}
195

    
196
	private static String getStringValue(final org.w3c.dom.Element elem, final String defaultValue) {
197
		return (elem != null && elem.getTextContent() != null) ? elem.getTextContent() : defaultValue;
198
	}
199

    
200
	private static String getStringValue(final org.w3c.dom.Element elem) {
201
		return getStringValue(elem, "");
202
	}
203

    
204
	protected static String getAttributeValue(final NamedNodeMap attributes, final String name) {
205
		final Node attr = attributes.getNamedItem(name);
206
		if (attr == null) return "";
207
		final String value = attr.getNodeValue();
208
		return value != null ? value : "";
209
	}
210

    
211
	protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) {
212
		for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
213
			if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child;
214
		}
215
		return null;
216
	}
217

    
218
	protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
219
		return getQualifier(classname, classname, schemename, schemename);
220
	}
221

    
222
	protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
223
		return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
224
	}
225

    
226
	protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
227
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
228
	}
229

    
230
	protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
231
		if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null;
232

    
233
		if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
234
			qualifier.setClassid(fields.get(0));
235
			qualifier.setClassname(fields.get(0));
236
		}
237
		return qualifier;
238
	}
239

    
240
	protected static void addStructuredProps(final Builder builder,
241
			final FieldDescriptor fd,
242
			final List<String> values,
243
			final String classid,
244
			final String schemeid) {
245
		if (values != null) {
246
			for (final String s : values) {
247
				addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid));
248
			}
249
		}
250
	}
251

    
252
	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
253

    
254
		final List<StructuredProperty> pids = Lists.newArrayList();
255

    
256
		for (int i = 0; i < nodelist.getLength(); i++) {
257
			final Node node = nodelist.item(i);
258
			Node pidType = null;
259
			if (node.getNodeType() == Node.ELEMENT_NODE) {
260
				if (node.getLocalName().equalsIgnoreCase("identifier")) {
261
					pidType = node.getAttributes().getNamedItem("identifierType");
262
				}
263
				//this is to handle dataset pids
264
				if (node.getLocalName().equalsIgnoreCase("alternateIdentifier")) {
265
					pidType = node.getAttributes().getNamedItem("alternateIdentifierType");
266
				}
267

    
268
				for (int j = 0; j < node.getChildNodes().getLength(); j++) {
269
					final Node child = node.getChildNodes().item(j);
270

    
271
					if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()
272
							&& !pidType.getNodeValue().equalsIgnoreCase("url")) {
273

    
274
						final String type = pidType.getNodeValue().toLowerCase();
275

    
276
						final String value = child.getTextContent();
277

    
278
						pids.add(getStructuredProperty(value, type, type, "dnet:pid_types", "dnet:pid_types"));
279
						break;
280
					}
281
				}
282
			}
283
		}
284
		return pids;
285
	}
286

    
287
	@SuppressWarnings("unchecked")
288
	protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) {
289

    
290
		if (value == null) return;
291

    
292
		if (value instanceof List<?>) {
293
			for (final Object o : (List<Object>) value) {
294
				addField(builder, descriptor, o);
295
			}
296
		} else {
297
			Object fieldValue = value;
298
			switch (descriptor.getType()) {
299
			case BOOL:
300
				fieldValue = Boolean.valueOf(value.toString());
301
				break;
302
			case BYTES:
303
				fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
304
				break;
305
			case DOUBLE:
306
				fieldValue = Double.valueOf(value.toString());
307
				break;
308
			case FLOAT:
309
				fieldValue = Float.valueOf(value.toString());
310
				break;
311
			case INT32:
312
			case INT64:
313
			case SINT32:
314
			case SINT64:
315
				fieldValue = Integer.valueOf(value.toString());
316
				break;
317
			case MESSAGE:
318
				final Builder q = builder.newBuilderForField(descriptor);
319

    
320
				if (value instanceof Builder) {
321
					value = ((Builder) value).build();
322
					final byte[] b = ((Message) value).toByteArray();
323
					try {
324
						q.mergeFrom(b);
325
					} catch (final InvalidProtocolBufferException e) {
326
						throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName());
327
					}
328
				} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
329
					if (value instanceof Qualifier) {
330
						q.mergeFrom((Qualifier) value);
331
					} else {
332
						parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
333
					}
334
				} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
335
					if (value instanceof StructuredProperty) {
336
						q.mergeFrom((StructuredProperty) value);
337
					} else {
338
						parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
339
					}
340
				} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
341
					if (value instanceof KeyValue) {
342
						q.mergeFrom((KeyValue) value);
343
					} else {
344
						parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
345
					}
346
				} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
347
					if (value instanceof StringField) {
348
						q.mergeFrom((StringField) value);
349
					} else {
350
						q.setField(StringField.getDescriptor().findFieldByName("value"), value);
351
					}
352
				} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
353
					if (value instanceof BoolField) {
354
						q.mergeFrom((BoolField) value);
355
					} else if (value instanceof String) {
356
						q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value));
357
					} else {
358
						q.setField(BoolField.getDescriptor().findFieldByName("value"), value);
359
					}
360
				} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
361
					if (value instanceof IntField) {
362
						q.mergeFrom((IntField) value);
363
					} else if (value instanceof String) {
364
						q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value));
365
					} else {
366
						q.setField(IntField.getDescriptor().findFieldByName("value"), value);
367
					}
368
				}
369

    
370
				fieldValue = q.buildPartial();
371
				break;
372
			default:
373
				break;
374
			}
375

    
376
			doAddField(builder, descriptor, fieldValue);
377
		}
378

    
379
	}
380

    
381
	protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
382
		if (value != null) {
383
			if (fd.isRepeated()) {
384
				builder.addRepeatedField(fd, value);
385
			} else if (fd.isOptional() || fd.isRequired()) {
386
				builder.setField(fd, value);
387
			}
388
		}
389
	}
390

    
391
	protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
392

    
393
		Iterable<Pair> iterablePair = () -> {
394

    
395
			final Iterator<FieldDescriptor> fields = descriptor.getFields().iterator();
396
			final Iterator<String> values = Lists.newArrayList(Splitter.on(split).trimResults().split(value)).iterator();
397

    
398
			return new Iterator<Pair>() {
399
				@Override
400
				public boolean hasNext() {
401
					return fields.hasNext() && values.hasNext();
402
				}
403

    
404
				@Override
405
				public Pair next() {
406
					final FieldDescriptor field = fields.next();
407
					final String value1 = values.next();
408
					return new Pair(field, value1);
409
				}
410

    
411
				@Override
412
				public void remove() {
413
					throw new UnsupportedOperationException("cannot remove");
414
				}
415
			};
416
		};
417

    
418
//		final IterablePair<FieldDescriptor, String> iterablePair =
419
//				new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
420
//						.on(split).trimResults().split(value)));
421

    
422
		for (final Pair<FieldDescriptor, String> p : iterablePair) {
423
			addField(builder, p.getKey(), p.getValue());
424
		}
425
	}
426

    
427
	protected static String base64(final byte[] data) {
428
		final byte[] bytes = Base64.encodeBase64(data);
429
		return new String(bytes);
430
	}
431

    
432
	public static String replace(final String s, final String regex, final String replacement) {
433
		return s.replaceAll(regex, replacement);
434
	}
435

    
436
	public static String trim(final String s) {
437
		return s.trim();
438
	}
439

    
440
	protected static String removePrefix(final Type type, final String s) {
441
		return removePrefix(type.toString(), s);
442
	}
443

    
444
	private static String removePrefix(final String prefix, final String s) {
445
		return StringUtils.removeStart("" + s, prefix + "|");
446
	}
447

    
448
	protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
449
		final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
450
		return qualifier;
451
	}
452

    
453
	protected static StructuredProperty getStructuredProperty(final String value,
454
			final String classid,
455
			final String classname,
456
			final String schemeid,
457
			final String schemename) {
458
		if ((value == null) || value.isEmpty()) return null;
459
		return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
460
	}
461

    
462
	protected static StringField.Builder sf(final String s) {
463
		return StringField.newBuilder().setValue(s);
464
	}
465

    
466
	public static String generateNsPrefix(final String prefix, final String externalId) {
467
		return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN);
468
	}
469

    
470
	public static String md5(final String s) {
471
		try {
472
			final MessageDigest md = MessageDigest.getInstance("MD5");
473
			md.update(s.getBytes("UTF-8"));
474
			return new String(Hex.encodeHex(md.digest()));
475
		} catch (final Exception e) {
476
			System.err.println("Error creating id");
477
			return null;
478
		}
479
	}
480

    
481
	public static String oafId(final String entityType, final String prefix, final String id) {
482
		if (id.isEmpty() || prefix.isEmpty()) return "";
483
		return oafSimpleId(entityType, prefix + "::" + md5(id));
484
	}
485

    
486
	public static String oafSimpleId(final String entityType, final String id) {
487
		return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
488
	}
489

    
490
	public static String oafSplitId(final String entityType, final String fullId) {
491
		return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
492
	}
493

    
494
	/**
495
	 * Utility method, allows to perform param based map lookups in xsl
496
	 *
497
	 * @param map
498
	 * @param key
499
	 * @return value associated to the key.
500
	 */
501
	public static Object lookupValue(final Map<String, Object> map, final String key) {
502
		return map.get(key);
503
	}
504

    
505
	/**
506
	 * Utility method, allows to perform param based map lookups in xsl
507
	 *
508
	 * @param map
509
	 * @param key
510
	 * @return value associated to the key.
511
	 */
512
	public static int mustMerge(final Map<String, Object> map, final String key) {
513
		final Object val = lookupValue(map, key);
514
		return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0;
515
	}
516

    
517
}
(1-1/9)