1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.google.common.base.Predicates;
|
6
|
import com.google.common.collect.Iterables;
|
7
|
import com.google.common.collect.Lists;
|
8
|
import com.google.protobuf.Descriptors.Descriptor;
|
9
|
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
|
10
|
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
|
11
|
import eu.dnetlib.data.proto.DliProtos;
|
12
|
import eu.dnetlib.data.proto.FieldTypeProtos;
|
13
|
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
|
14
|
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
|
15
|
import eu.dnetlib.data.proto.PublicationProtos.Publication;
|
16
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
17
|
import org.apache.commons.lang3.StringUtils;
|
18
|
import org.apache.commons.lang3.exception.ExceptionUtils;
|
19
|
import org.w3c.dom.NamedNodeMap;
|
20
|
import org.w3c.dom.Node;
|
21
|
import org.w3c.dom.NodeList;
|
22
|
|
23
|
/**
|
24
|
* Created by claudio on 24/10/2016.
|
25
|
*/
|
26
|
public class PmfToHbaseXsltFunctions extends DliToHbaseXsltFunctions {
|
27
|
|
28
|
public static String createEntity(
|
29
|
final String pidType,
|
30
|
final String pid,
|
31
|
final String namespacePrefix,
|
32
|
final NodeList about) {
|
33
|
return DliToHbaseXsltFunctions.createEntity(pidType, pid, namespacePrefix, about);
|
34
|
}
|
35
|
|
36
|
public static String createEntityId(
|
37
|
final String pidType,
|
38
|
final String pid,
|
39
|
final String namespacePrefix,
|
40
|
final String type
|
41
|
) {
|
42
|
return DliToHbaseXsltFunctions.createEntityId(pidType, pid, namespacePrefix, type);
|
43
|
}
|
44
|
|
45
|
public static String pmfPublication(
|
46
|
final String resultId,
|
47
|
final String provenance,
|
48
|
final String trust,
|
49
|
final NodeList about,
|
50
|
final String originalId,
|
51
|
final String dateOfCollection,
|
52
|
final String dateOfTransformation,
|
53
|
final NodeList metadataNodes) {
|
54
|
try {
|
55
|
final ValueMap values = ValueMap.parseNodeList(metadataNodes);
|
56
|
final Descriptor mDesc = Publication.Metadata.getDescriptor();
|
57
|
|
58
|
final DNGFEntity.Builder entity = DliToHbaseXsltFunctions.parseAbout(about, null).addOriginalId(originalId);
|
59
|
addResolvedFrom(entity, about);
|
60
|
|
61
|
final Publication.Metadata.Builder metadata = buildMetadata(values, mDesc);
|
62
|
final Publication.Builder publication = buildPublication(metadata, values, mDesc, "", "");
|
63
|
final List<StructuredProperty> pids = Lists.newArrayList();
|
64
|
pids.addAll(parsePids(metadataNodes));
|
65
|
entity.setType(Type.publication).setId(resultId);
|
66
|
entity.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation);
|
67
|
entity.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection);
|
68
|
pids.stream().map(DliToHbaseXsltFunctions::fixPid).forEach(pid -> entity.addExtension(DliProtos.typedIdentifier, pid));
|
69
|
entity.setDateofcollection(dateOfCollection)
|
70
|
.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
|
71
|
entity.setPublication(publication);
|
72
|
final DNGF oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false));
|
73
|
return base64(oaf.toByteArray());
|
74
|
} catch (final Throwable e) {
|
75
|
System.err.println(ExceptionUtils.getStackTrace(e));
|
76
|
throw new IllegalArgumentException(e);
|
77
|
}
|
78
|
}
|
79
|
|
80
|
public static List<StructuredProperty> parsePids(final NodeList nodelist) {
|
81
|
|
82
|
final List<StructuredProperty> pids = Lists.newArrayList();
|
83
|
|
84
|
for (int i = 0; i < nodelist.getLength(); i++) {
|
85
|
final Node node = nodelist.item(i);
|
86
|
Node pidType = null;
|
87
|
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
88
|
if (node.getLocalName().equalsIgnoreCase("pid")) {
|
89
|
pidType = node.getAttributes().getNamedItem("type");
|
90
|
|
91
|
pids.add(getStructuredProperty(node.getTextContent(), pidType.getTextContent(), pidType.getTextContent(), "dnet:pid_types",
|
92
|
"dnet:pid_types"));
|
93
|
}
|
94
|
}
|
95
|
}
|
96
|
return pids;
|
97
|
}
|
98
|
|
99
|
public static String dliRel(
|
100
|
final String source,
|
101
|
final String target,
|
102
|
final String relTypeId,
|
103
|
final String relTypeScheme,
|
104
|
final String provenanceAction,
|
105
|
final String trust,
|
106
|
final NodeList about) {
|
107
|
|
108
|
|
109
|
return rel(source, target, relTypeId, relTypeScheme, provenanceAction, trust, about, null);
|
110
|
}
|
111
|
|
112
|
|
113
|
|
114
|
|
115
|
public static void addResolvedFrom(DNGFEntity.Builder entity, NodeList about) {
|
116
|
List<KeyValue.Builder> resolvedFrom = DliToHbaseXsltFunctions.getDatasourceProvenance(about, "resolvedFrom", null);
|
117
|
if (resolvedFrom != null) {
|
118
|
resolvedFrom.forEach(it -> entity.addExtension(DliProtos.resolvedfrom, it.build()));
|
119
|
}
|
120
|
}
|
121
|
}
|