1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.google.common.collect.Lists;
|
6
|
import com.google.protobuf.Descriptors.FieldDescriptor;
|
7
|
import com.google.protobuf.Message.Builder;
|
8
|
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
|
9
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
|
10
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
|
11
|
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
|
12
|
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
|
13
|
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
|
14
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
15
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
16
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
17
|
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
|
18
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
|
19
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
|
20
|
import eu.dnetlib.data.proto.ProjectProtos.Project;
|
21
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
22
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
23
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
24
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
|
25
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
|
26
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
|
27
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
|
28
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
29
|
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
|
30
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
31
|
import org.apache.commons.lang.StringUtils;
|
32
|
import org.w3c.dom.Node;
|
33
|
import org.w3c.dom.NodeList;
|
34
|
|
35
|
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
|
36
|
|
37
|
public static String oafEntity(final String type,
|
38
|
final String id,
|
39
|
final String collectedFromId,
|
40
|
final String collectedFromName,
|
41
|
final NodeList identities,
|
42
|
final String dateOfCollection,
|
43
|
final String dateOfTransformation,
|
44
|
final NodeList nodeList) {
|
45
|
|
46
|
final String entityId = OafRowKeyDecoder.decode(id).getKey();
|
47
|
List<String> ids = Lists.newArrayList();
|
48
|
for(int i = 0; i < identities.getLength(); i++){
|
49
|
Node n = identities.item(i);
|
50
|
String s = n.getTextContent();
|
51
|
ids.add(s);
|
52
|
}
|
53
|
switch (Type.valueOf(type)) {
|
54
|
case datasource:
|
55
|
return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
|
56
|
dateOfTransformation, Datasource.newBuilder());
|
57
|
case organization:
|
58
|
return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
|
59
|
dateOfTransformation, Organization.newBuilder());
|
60
|
case project:
|
61
|
return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
|
62
|
dateOfTransformation, Project.newBuilder());
|
63
|
case result:
|
64
|
return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
|
65
|
dateOfTransformation ,Result.newBuilder());
|
66
|
default:
|
67
|
throw new IllegalArgumentException("Invalid entity type: " + type);
|
68
|
}
|
69
|
}
|
70
|
|
71
|
public static String oafRel(final String relationType,
|
72
|
final String source,
|
73
|
final String target,
|
74
|
final NodeList nodeList,
|
75
|
final String relClass,
|
76
|
final String relScheme) {
|
77
|
return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null);
|
78
|
}
|
79
|
|
80
|
public static String oafRel(final String relationType,
|
81
|
final String source,
|
82
|
final String target,
|
83
|
final NodeList nodeList,
|
84
|
final String relClass,
|
85
|
final String relScheme,
|
86
|
final String collectedFromId,
|
87
|
final String collectedFromName) {
|
88
|
|
89
|
final String eSource = OafRowKeyDecoder.decode(source).getKey();
|
90
|
final String eTarget = OafRowKeyDecoder.decode(target).getKey();
|
91
|
final RelType relType = RelType.valueOf(relationType);
|
92
|
|
93
|
switch (relType) {
|
94
|
case datasourceOrganization:
|
95
|
Provision.Builder provision = Provision.newBuilder().setRelMetadata(
|
96
|
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme)));
|
97
|
DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision);
|
98
|
|
99
|
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision);
|
100
|
case projectOrganization:
|
101
|
Participation.Builder participant = Participation.newBuilder().setRelMetadata(
|
102
|
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme)));
|
103
|
ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant);
|
104
|
|
105
|
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant);
|
106
|
case resultProject:
|
107
|
Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(
|
108
|
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
|
109
|
ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome);
|
110
|
|
111
|
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome);
|
112
|
case resultOrganization:
|
113
|
Affiliation.Builder affiliation = Affiliation.newBuilder().setRelMetadata(
|
114
|
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Affiliation.RelName.valueOf(relClass).toString(), relScheme)));
|
115
|
ResultOrganization.Builder resultOrganization = ResultOrganization.newBuilder().setAffiliation(affiliation);
|
116
|
|
117
|
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.affiliation, relClass, collectedFromId, collectedFromName, false, resultOrganization, affiliation);
|
118
|
default:
|
119
|
throw new IllegalArgumentException("unhandled relType: " + relationType);
|
120
|
}
|
121
|
}
|
122
|
|
123
|
// ////////////////////////////////////////////////////////
|
124
|
|
125
|
protected static String serializeOafEntity(final NodeList nodelist,
|
126
|
final Type type,
|
127
|
final String id,
|
128
|
final KeyValue collectedFrom,
|
129
|
final List<String> identities,
|
130
|
final String dateOfCollection,
|
131
|
final String dateOfTransformation,
|
132
|
final Builder entity) {
|
133
|
try {
|
134
|
final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata");
|
135
|
|
136
|
final OafEntity.Builder parent = getEntity(type, id, collectedFrom, identities, dateOfCollection, dateOfTransformation, null);
|
137
|
final Builder metadata = entity.newBuilderForField(md);
|
138
|
final DataInfo.Builder dataInfo = DataInfo.newBuilder();
|
139
|
|
140
|
if (type.equals(Type.result)) {
|
141
|
final Instance.Builder instance = Instance.newBuilder();
|
142
|
parseNodelist(nodelist, instance);
|
143
|
FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
|
144
|
if (instanceDescriptor != null) {
|
145
|
entity.setField(instanceDescriptor, instance);
|
146
|
}
|
147
|
}
|
148
|
parseNodelist(nodelist, parent, entity, metadata, dataInfo);
|
149
|
|
150
|
final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString());
|
151
|
|
152
|
final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo);
|
153
|
|
154
|
return base64(build.toByteArray());
|
155
|
} catch (Exception e) {
|
156
|
e.printStackTrace(System.err);
|
157
|
throw new RuntimeException(e);
|
158
|
}
|
159
|
}
|
160
|
|
161
|
protected static String serializeOafRel(final NodeList nodeList,
|
162
|
final String sourceId,
|
163
|
final String targetId,
|
164
|
final RelType relType,
|
165
|
final SubRelType subRelType,
|
166
|
final String relClass,
|
167
|
final String collectedFromId,
|
168
|
final String collectedFromName,
|
169
|
final boolean isChild,
|
170
|
final Builder rel,
|
171
|
final Builder subRel) {
|
172
|
try {
|
173
|
|
174
|
final DataInfo.Builder dataInfo = DataInfo.newBuilder();
|
175
|
|
176
|
parseNodelist(nodeList, rel, subRel, dataInfo);
|
177
|
|
178
|
OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, collectedFromId, collectedFromName, isChild);
|
179
|
|
180
|
FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString());
|
181
|
rel.setField(subRelDescriptor, subRel.build());
|
182
|
|
183
|
FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString());
|
184
|
builder.setField(relDescriptor, rel.build());
|
185
|
|
186
|
Oaf build = getOaf(builder, dataInfo);
|
187
|
return base64(build.toByteArray());
|
188
|
} catch (Exception e) {
|
189
|
e.printStackTrace(System.err);
|
190
|
throw new RuntimeException(e);
|
191
|
}
|
192
|
}
|
193
|
|
194
|
private static void parseNodelist(final NodeList nodeList, final Builder... builders) {
|
195
|
|
196
|
for (int i = 0; i < nodeList.getLength(); i++) {
|
197
|
|
198
|
final Node fieldNode = nodeList.item(i);
|
199
|
final Node attr = fieldNode.getAttributes().getNamedItem("name");
|
200
|
|
201
|
final String fieldName = attr.getNodeValue();
|
202
|
final NodeList children = fieldNode.getChildNodes();
|
203
|
|
204
|
for (int j = 0; j < children.getLength(); j++) {
|
205
|
|
206
|
final Node child = children.item(j);
|
207
|
final String childName = child.getLocalName();
|
208
|
if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) {
|
209
|
for (Builder builder : builders) {
|
210
|
FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName);
|
211
|
if (desc != null) {
|
212
|
String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child);
|
213
|
if (!StringUtils.isBlank(text)) {
|
214
|
addField(builder, desc, text);
|
215
|
}
|
216
|
}
|
217
|
}
|
218
|
}
|
219
|
}
|
220
|
}
|
221
|
}
|
222
|
|
223
|
private static String getText(final Node node) {
|
224
|
StringBuffer result = new StringBuffer();
|
225
|
if (!node.hasChildNodes()) { return ""; }
|
226
|
|
227
|
NodeList list = node.getChildNodes();
|
228
|
for (int i = 0; i < list.getLength(); i++) {
|
229
|
Node subnode = list.item(i);
|
230
|
if (subnode.getNodeType() == Node.TEXT_NODE) {
|
231
|
result.append(subnode.getNodeValue());
|
232
|
} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) {
|
233
|
result.append(subnode.getNodeValue());
|
234
|
} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
|
235
|
result.append(getText(subnode));
|
236
|
}
|
237
|
}
|
238
|
return result.toString().trim();
|
239
|
}
|
240
|
|
241
|
}
|