1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
|
4
|
import java.util.ArrayList;
|
5
|
import java.util.List;
|
6
|
|
7
|
import eu.dnetlib.data.proto.*;
|
8
|
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
|
9
|
import org.apache.commons.lang3.StringUtils;
|
10
|
import org.w3c.dom.Node;
|
11
|
import org.w3c.dom.NodeList;
|
12
|
|
13
|
/**
|
14
|
* Created by sandro on 11/3/16.
|
15
|
*/
|
16
|
|
17
|
public class DliToHbaseXsltFunctions extends OafToHbaseXsltFunctions {
|
18
|
|
19
|
public static String getCompletionStatus(NodeList about) {
|
20
|
Node dataInfoNode = getDataInfo(about);
|
21
|
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
|
22
|
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
|
23
|
final Node currentNode = dataInfoNode.getChildNodes().item(i);
|
24
|
if ("completionStatus".equals(currentNode.getLocalName())) {
|
25
|
return currentNode.getTextContent();
|
26
|
}
|
27
|
}
|
28
|
}
|
29
|
return null;
|
30
|
}
|
31
|
|
32
|
public static Node getDataInfo(final NodeList about) {
|
33
|
if (about.getLength() > 0) {
|
34
|
final NodeList aboutChildren = about.item(0).getChildNodes();
|
35
|
for (int i = 0; i < aboutChildren.getLength(); i++) {
|
36
|
final Node currentNode = aboutChildren.item(i);
|
37
|
if ("datainfo".equals(currentNode.getLocalName())) {
|
38
|
return currentNode;
|
39
|
}
|
40
|
}
|
41
|
}
|
42
|
return null;
|
43
|
}
|
44
|
|
45
|
|
46
|
public static String createDNetEntityIdentifier(
|
47
|
final String pidType,
|
48
|
final String pid,
|
49
|
final String namespacePrefix,
|
50
|
final String typeString) {
|
51
|
|
52
|
final int typeNumber = TypeProtos.Type.valueOf(typeString).getNumber();
|
53
|
return createEntityId(pidType, pid, namespacePrefix, "" + typeNumber);
|
54
|
}
|
55
|
|
56
|
|
57
|
public static String createSimpleEntity(
|
58
|
final String pidType,
|
59
|
final String pid,
|
60
|
final String namespacePrefix,
|
61
|
final NodeList about,
|
62
|
final String type,
|
63
|
final String completionStatus) {
|
64
|
try {
|
65
|
|
66
|
|
67
|
final int typeNumber = TypeProtos.Type.valueOf(type).getNumber();
|
68
|
final String entityId = pidType.equals("dnet") ? String.format("%d|%s", typeNumber, pid) : createDNetEntityIdentifier(pidType, pid, namespacePrefix, type);
|
69
|
|
70
|
final DNGFProtos.DNGFEntity.Builder entity = parseAbout(about, completionStatus);
|
71
|
FieldTypeProtos.StructuredProperty.Builder identifier = FieldTypeProtos.StructuredProperty.newBuilder();
|
72
|
identifier.setQualifier(getSimpleQualifier(pidType, "dnet:pid_types"));
|
73
|
identifier.setValue(pid.trim());
|
74
|
entity.addExtension(DliProtos.typedIdentifier, fixPid(identifier.build()));
|
75
|
entity.setId(entityId);
|
76
|
entity.setType(TypeProtos.Type.valueOf(type));
|
77
|
final DNGFProtos.DNGF oaf = DNGFProtos.DNGF.newBuilder().setEntity(entity).setKind(KindProtos.Kind.entity).build();
|
78
|
|
79
|
return base64(oaf.toByteArray());
|
80
|
} catch (Throwable e) {
|
81
|
throw new RuntimeException(e);
|
82
|
}
|
83
|
}
|
84
|
|
85
|
public static String createEntity(
|
86
|
final String pidType,
|
87
|
final String pid,
|
88
|
final String namespacePrefix,
|
89
|
final NodeList about,
|
90
|
final String completionStatus) {
|
91
|
return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, completionStatus);
|
92
|
}
|
93
|
|
94
|
public static String createEntity(
|
95
|
final String pidType,
|
96
|
final String pid,
|
97
|
final String namespacePrefix,
|
98
|
final NodeList about) {
|
99
|
return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, null);
|
100
|
}
|
101
|
|
102
|
public static String createEntityId(
|
103
|
final String pidType,
|
104
|
final String pid,
|
105
|
final String namespacePrefix,
|
106
|
final String type
|
107
|
) {
|
108
|
final DLIResolvedObject obj = new DLIResolvedObject();
|
109
|
obj.setPid(pid);
|
110
|
obj.setPidType(pidType);
|
111
|
final String entityId = String.format("%s|%s::%s", type, namespacePrefix, obj.getIdentifier());
|
112
|
return entityId;
|
113
|
}
|
114
|
|
115
|
public static DNGFProtos.DNGFEntity.Builder parseAbout(final NodeList about, final String completionStatusOverride) {
|
116
|
final DNGFProtos.DNGFEntity.Builder entity = DNGFProtos.DNGFEntity.newBuilder();
|
117
|
|
118
|
final String completionStatus = getCompletionStatus(about);
|
119
|
if (StringUtils.isNoneBlank(completionStatusOverride)) {
|
120
|
entity.setExtension(DliProtos.completionStatus, completionStatusOverride);
|
121
|
} else if (completionStatus != null) {
|
122
|
entity.setExtension(DliProtos.completionStatus, completionStatus);
|
123
|
}
|
124
|
|
125
|
List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", completionStatusOverride);
|
126
|
|
127
|
if (collectedFrom != null) {
|
128
|
collectedFrom.forEach(entity::addCollectedfrom);
|
129
|
}
|
130
|
return entity;
|
131
|
}
|
132
|
|
133
|
public static String createRel(
|
134
|
final String source,
|
135
|
final String target,
|
136
|
final String relationSemantic,
|
137
|
final String relType,
|
138
|
final String provenanceAction,
|
139
|
final String trust,
|
140
|
final NodeList about) {
|
141
|
|
142
|
try {
|
143
|
DNGFProtos.DNGFRel.Builder rel = relProto(source.trim(), target.trim(), relationSemantic, relType);
|
144
|
List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", null);
|
145
|
if (collectedFrom == null || collectedFrom.isEmpty()) {
|
146
|
throw new IllegalStateException(String.format("missing provenance information in rel source '%s', target '%s'", source, target));
|
147
|
}
|
148
|
collectedFrom.forEach(rel::addCollectedfrom);
|
149
|
final DNGFProtos.DNGF pmf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false));
|
150
|
return base64(pmf.toByteArray());
|
151
|
|
152
|
} catch (Throwable e) {
|
153
|
e.printStackTrace(System.err);
|
154
|
throw new RuntimeException(e);
|
155
|
}
|
156
|
|
157
|
}
|
158
|
|
159
|
public static List<FieldTypeProtos.KeyValue.Builder> getDatasourceProvenance(NodeList about, final String nodeName, final String completionStatus) {
|
160
|
Node dataInfoNode = getDataInfo(about);
|
161
|
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
|
162
|
List<FieldTypeProtos.KeyValue.Builder> result = new ArrayList<>();
|
163
|
|
164
|
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
|
165
|
final Node currentNode = dataInfoNode.getChildNodes().item(i);
|
166
|
if (nodeName.equals(currentNode.getLocalName())) {
|
167
|
FieldTypeProtos.KeyValue.Builder currentItem = FieldTypeProtos.KeyValue.newBuilder();
|
168
|
final Node idNode = currentNode.getAttributes().getNamedItem("id");
|
169
|
final Node nameNode = currentNode.getAttributes().getNamedItem("name");
|
170
|
final Node completionStatusNode = currentNode.getAttributes().getNamedItem("completionStatus");
|
171
|
if (idNode != null) {
|
172
|
currentItem.setKey(idNode.getTextContent());
|
173
|
}
|
174
|
if (nameNode != null) {
|
175
|
currentItem.setValue(nameNode.getTextContent());
|
176
|
}
|
177
|
if (completionStatusNode != null) {
|
178
|
if (StringUtils.isBlank(completionStatus)) {
|
179
|
currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatusNode.getTextContent());
|
180
|
} else {
|
181
|
currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatus);
|
182
|
}
|
183
|
}
|
184
|
|
185
|
result.add(currentItem);
|
186
|
}
|
187
|
}
|
188
|
return result;
|
189
|
}
|
190
|
return null;
|
191
|
}
|
192
|
|
193
|
|
194
|
public static FieldTypeProtos.StructuredProperty fixPid(final FieldTypeProtos.StructuredProperty inputPid) {
|
195
|
if (inputPid == null) {
|
196
|
return null;
|
197
|
}
|
198
|
String pid = inputPid.getValue();
|
199
|
String pidType = inputPid.getQualifier().getClassid();
|
200
|
|
201
|
if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidType)) {
|
202
|
return null;
|
203
|
}
|
204
|
pidType = pidType.trim().toLowerCase();
|
205
|
pid = pid.trim().toLowerCase();
|
206
|
|
207
|
if ("doi".equals(pidType)) {
|
208
|
pid = pid.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
|
209
|
}
|
210
|
return getStructuredProperty(pid, pidType, pidType, inputPid.getQualifier().getSchemeid(), inputPid.getQualifier().getSchemename());
|
211
|
}
|
212
|
}
|