1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
|
4
|
import java.util.ArrayList;
|
5
|
import java.util.List;
|
6
|
|
7
|
import eu.dnetlib.data.proto.*;
|
8
|
import eu.dnetlib.data.proto.DliFieldTypeProtos;
|
9
|
import eu.dnetlib.data.proto.DliProtos;
|
10
|
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
|
11
|
import org.apache.commons.lang3.StringUtils;
|
12
|
import org.w3c.dom.Node;
|
13
|
import org.w3c.dom.NodeList;
|
14
|
|
15
|
/**
|
16
|
* Created by sandro on 11/3/16.
|
17
|
*/
|
18
|
|
19
|
public class DliToHbaseXsltFunctions extends OafToHbaseXsltFunctions {
|
20
|
|
21
|
public static String getCompletionStatus(NodeList about) {
|
22
|
Node dataInfoNode = getDataInfo(about);
|
23
|
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
|
24
|
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
|
25
|
final Node currentNode = dataInfoNode.getChildNodes().item(i);
|
26
|
if ("completionStatus".equals(currentNode.getLocalName())) {
|
27
|
return currentNode.getTextContent();
|
28
|
}
|
29
|
}
|
30
|
}
|
31
|
return null;
|
32
|
}
|
33
|
|
34
|
public static Node getDataInfo(final NodeList about) {
|
35
|
if (about.getLength() > 0) {
|
36
|
final NodeList aboutChildren = about.item(0).getChildNodes();
|
37
|
for (int i = 0; i < aboutChildren.getLength(); i++) {
|
38
|
final Node currentNode = aboutChildren.item(i);
|
39
|
if ("datainfo".equals(currentNode.getLocalName())) {
|
40
|
return currentNode;
|
41
|
}
|
42
|
}
|
43
|
}
|
44
|
return null;
|
45
|
}
|
46
|
|
47
|
|
48
|
public static String createDNetEntityIdentifier(
|
49
|
final String pidType,
|
50
|
final String pid,
|
51
|
final String namespacePrefix,
|
52
|
final String typeString) {
|
53
|
|
54
|
final int typeNumber = TypeProtos.Type.valueOf(typeString).getNumber();
|
55
|
return createEntityId(pidType, pid, namespacePrefix, "" + typeNumber);
|
56
|
}
|
57
|
|
58
|
|
59
|
public static String createSimpleEntity(
|
60
|
final String pidType,
|
61
|
final String pid,
|
62
|
final String namespacePrefix,
|
63
|
final NodeList about,
|
64
|
final String type,
|
65
|
final String completionStatus) {
|
66
|
try {
|
67
|
|
68
|
|
69
|
final int typeNumber = TypeProtos.Type.valueOf(type).getNumber();
|
70
|
final String entityId = pidType.equals("dnet") ? String.format("%d|%s", typeNumber, pid) : createDNetEntityIdentifier(pidType, pid, namespacePrefix, type);
|
71
|
|
72
|
final DNGFProtos.DNGFEntity.Builder entity = parseAbout(about, completionStatus);
|
73
|
FieldTypeProtos.StructuredProperty.Builder identifier = FieldTypeProtos.StructuredProperty.newBuilder();
|
74
|
identifier.setQualifier(getSimpleQualifier(pidType, "dnet:pid_types"));
|
75
|
identifier.setValue(pid.trim());
|
76
|
entity.addExtension(DliProtos.typedIdentifier, fixPid(identifier.build()));
|
77
|
entity.setId(entityId);
|
78
|
entity.setType(TypeProtos.Type.valueOf(type));
|
79
|
final DNGFProtos.DNGF oaf = DNGFProtos.DNGF.newBuilder().setEntity(entity).setKind(KindProtos.Kind.entity).build();
|
80
|
|
81
|
return base64(oaf.toByteArray());
|
82
|
} catch (Throwable e) {
|
83
|
throw new RuntimeException(e);
|
84
|
}
|
85
|
}
|
86
|
|
87
|
public static String createEntity(
|
88
|
final String pidType,
|
89
|
final String pid,
|
90
|
final String namespacePrefix,
|
91
|
final NodeList about,
|
92
|
final String completionStatus) {
|
93
|
return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, completionStatus);
|
94
|
}
|
95
|
|
96
|
public static String createEntity(
|
97
|
final String pidType,
|
98
|
final String pid,
|
99
|
final String namespacePrefix,
|
100
|
final NodeList about) {
|
101
|
return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, null);
|
102
|
}
|
103
|
|
104
|
public static String createEntityId(
|
105
|
final String pidType,
|
106
|
final String pid,
|
107
|
final String namespacePrefix,
|
108
|
final String type
|
109
|
) {
|
110
|
final DLIResolvedObject obj = new DLIResolvedObject();
|
111
|
obj.setPid(pid);
|
112
|
obj.setPidType(pidType);
|
113
|
final String entityId = String.format("%s|%s::%s", type, namespacePrefix, obj.getIdentifier());
|
114
|
return entityId;
|
115
|
}
|
116
|
|
117
|
public static DNGFProtos.DNGFEntity.Builder parseAbout(final NodeList about, final String completionStatusOverride) {
|
118
|
final DNGFProtos.DNGFEntity.Builder entity = DNGFProtos.DNGFEntity.newBuilder();
|
119
|
|
120
|
final String completionStatus = getCompletionStatus(about);
|
121
|
if (StringUtils.isNoneBlank(completionStatusOverride)) {
|
122
|
entity.setExtension(DliProtos.completionStatus, completionStatusOverride);
|
123
|
} else if (completionStatus != null) {
|
124
|
entity.setExtension(DliProtos.completionStatus, completionStatus);
|
125
|
}
|
126
|
|
127
|
List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", completionStatusOverride);
|
128
|
|
129
|
if (collectedFrom != null) {
|
130
|
collectedFrom.forEach(entity::addCollectedfrom);
|
131
|
}
|
132
|
return entity;
|
133
|
}
|
134
|
|
135
|
public static String createRel(
|
136
|
final String source,
|
137
|
final String target,
|
138
|
final String relationSemantic,
|
139
|
final String relType,
|
140
|
final String provenanceAction,
|
141
|
final String trust,
|
142
|
final NodeList about) {
|
143
|
|
144
|
try {
|
145
|
DNGFProtos.DNGFRel.Builder rel = relProto(source.trim(), target.trim(), relationSemantic, relType);
|
146
|
List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", null);
|
147
|
if (collectedFrom == null || collectedFrom.isEmpty()) {
|
148
|
throw new IllegalStateException(String.format("missing provenance information in rel source '%s', target '%s'", source, target));
|
149
|
}
|
150
|
collectedFrom.forEach(rel::addCollectedfrom);
|
151
|
final DNGFProtos.DNGF pmf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false));
|
152
|
return base64(pmf.toByteArray());
|
153
|
|
154
|
} catch (Throwable e) {
|
155
|
e.printStackTrace(System.err);
|
156
|
throw new RuntimeException(e);
|
157
|
}
|
158
|
|
159
|
}
|
160
|
|
161
|
public static List<FieldTypeProtos.KeyValue.Builder> getDatasourceProvenance(NodeList about, final String nodeName, final String completionStatus) {
|
162
|
Node dataInfoNode = getDataInfo(about);
|
163
|
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
|
164
|
List<FieldTypeProtos.KeyValue.Builder> result = new ArrayList<>();
|
165
|
|
166
|
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
|
167
|
final Node currentNode = dataInfoNode.getChildNodes().item(i);
|
168
|
if (nodeName.equals(currentNode.getLocalName())) {
|
169
|
FieldTypeProtos.KeyValue.Builder currentItem = FieldTypeProtos.KeyValue.newBuilder();
|
170
|
final Node idNode = currentNode.getAttributes().getNamedItem("id");
|
171
|
final Node nameNode = currentNode.getAttributes().getNamedItem("name");
|
172
|
final Node completionStatusNode = currentNode.getAttributes().getNamedItem("completionStatus");
|
173
|
if (idNode != null) {
|
174
|
currentItem.setKey(idNode.getTextContent());
|
175
|
}
|
176
|
if (nameNode != null) {
|
177
|
currentItem.setValue(nameNode.getTextContent());
|
178
|
}
|
179
|
if (completionStatusNode != null) {
|
180
|
if (StringUtils.isBlank(completionStatus)) {
|
181
|
currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatusNode.getTextContent());
|
182
|
} else {
|
183
|
currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatus);
|
184
|
}
|
185
|
}
|
186
|
|
187
|
result.add(currentItem);
|
188
|
}
|
189
|
}
|
190
|
return result;
|
191
|
}
|
192
|
return null;
|
193
|
}
|
194
|
|
195
|
|
196
|
public static FieldTypeProtos.StructuredProperty fixPid(final FieldTypeProtos.StructuredProperty inputPid) {
|
197
|
if (inputPid == null) {
|
198
|
return null;
|
199
|
}
|
200
|
String pid = inputPid.getValue();
|
201
|
String pidType = inputPid.getQualifier().getClassid();
|
202
|
|
203
|
if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidType)) {
|
204
|
return null;
|
205
|
}
|
206
|
pidType = pidType.trim().toLowerCase();
|
207
|
pid = pid.trim().toLowerCase();
|
208
|
|
209
|
if ("doi".equals(pidType)) {
|
210
|
pid = pid.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
|
211
|
}
|
212
|
return getStructuredProperty(pid, pidType, pidType, inputPid.getQualifier().getSchemeid(), inputPid.getQualifier().getSchemename());
|
213
|
}
|
214
|
}
|