Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4

    
5
import com.google.common.collect.Lists;
6
import com.google.protobuf.Descriptors.FieldDescriptor;
7
import com.google.protobuf.Message.Builder;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
10
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
11
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
12
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
13
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafEntity;
16
import eu.dnetlib.data.proto.OafProtos.OafRel;
17
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
18
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
19
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
20
import eu.dnetlib.data.proto.ProjectProtos.Project;
21
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
22
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
23
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
24
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
25
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
26
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
27
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
28
import eu.dnetlib.data.proto.ResultProtos.Result;
29
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
30
import eu.dnetlib.data.proto.TypeProtos.Type;
31
import org.apache.commons.lang.StringUtils;
32
import org.w3c.dom.Node;
33
import org.w3c.dom.NodeList;
34

    
35
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
36

    
37
	public static String oafEntity(final String type,
38
			final String id,
39
			final String collectedFromId,
40
			final String collectedFromName,
41
			final NodeList identities,
42
			final String dateOfCollection,
43
			final String dateOfTransformation,
44
			final NodeList nodeList) {
45

    
46
		final String entityId = OafRowKeyDecoder.decode(id).getKey();
47
		List<String> ids = Lists.newArrayList();
48
		for(int i = 0; i < identities.getLength(); i++){
49
			Node n = identities.item(i);
50
			String s = n.getTextContent();
51
			ids.add(s);
52
		}
53
		switch (Type.valueOf(type)) {
54
		case datasource:
55
			return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
56
					dateOfTransformation, Datasource.newBuilder());
57
		case organization:
58
			return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
59
					dateOfTransformation, Organization.newBuilder());
60
		case project:
61
			return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
62
					dateOfTransformation, Project.newBuilder());
63
		case result:
64
			return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
65
					dateOfTransformation ,Result.newBuilder());
66
		default:
67
			throw new IllegalArgumentException("Invalid entity type: " + type);
68
		}
69
	}
70

    
71
	public static String oafRel(final String relationType,
72
			final String source,
73
			final String target,
74
			final NodeList nodeList,
75
			final String relClass,
76
			final String relScheme) {
77
		return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null);
78
	}
79

    
80
	public static String oafRel(final String relationType,
81
			final String source,
82
			final String target,
83
			final NodeList nodeList,
84
			final String relClass,
85
			final String relScheme,
86
			final String collectedFromId,
87
			final String collectedFromName) {
88

    
89
		final String eSource = OafRowKeyDecoder.decode(source).getKey();
90
		final String eTarget = OafRowKeyDecoder.decode(target).getKey();
91
		final RelType relType = RelType.valueOf(relationType);
92

    
93
		switch (relType) {
94
		case datasourceOrganization:
95
			Provision.Builder provision = Provision.newBuilder().setRelMetadata(
96
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme)));
97
			DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision);
98

    
99
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision);
100
		case projectOrganization:
101
			Participation.Builder participant = Participation.newBuilder().setRelMetadata(
102
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme)));
103
			ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant);
104

    
105
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant);
106
		case resultProject:
107
			Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(
108
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
109
			ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome);
110

    
111
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome);
112
		case resultOrganization:
113
			Affiliation.Builder affiliation = Affiliation.newBuilder().setRelMetadata(
114
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Affiliation.RelName.valueOf(relClass).toString(), relScheme)));
115
			ResultOrganization.Builder resultOrganization = ResultOrganization.newBuilder().setAffiliation(affiliation);
116

    
117
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.affiliation, relClass, collectedFromId, collectedFromName, false, resultOrganization, affiliation);
118
		default:
119
			throw new IllegalArgumentException("unhandled relType: " + relationType);
120
		}
121
	}
122

    
123
	// ////////////////////////////////////////////////////////
124

    
125
	protected static String serializeOafEntity(final NodeList nodelist,
126
			final Type type,
127
			final String id,
128
			final KeyValue collectedFrom,
129
			final List<String> identities,
130
			final String dateOfCollection,
131
			final String dateOfTransformation,
132
			final Builder entity) {
133
		try {
134
			final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata");
135

    
136
			final OafEntity.Builder parent = getEntity(type, id, collectedFrom, identities, dateOfCollection, dateOfTransformation, null);
137
			final Builder metadata = entity.newBuilderForField(md);
138
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
139

    
140
			if (type.equals(Type.result)) {
141
				final Instance.Builder instance = Instance.newBuilder();
142
				parseNodelist(nodelist, instance);
143
				FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
144
				if (instanceDescriptor != null) {
145
					entity.setField(instanceDescriptor, instance);
146
				}
147
			}
148
			parseNodelist(nodelist, parent, entity, metadata, dataInfo);
149

    
150
			final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString());
151

    
152
			final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo);
153

    
154
			return base64(build.toByteArray());
155
		} catch (Exception e) {
156
			e.printStackTrace(System.err);
157
			throw new RuntimeException(e);
158
		}
159
	}
160

    
161
	protected static String serializeOafRel(final NodeList nodeList,
162
			final String sourceId,
163
			final String targetId,
164
			final RelType relType,
165
			final SubRelType subRelType,
166
			final String relClass,
167
			final String collectedFromId,
168
			final String collectedFromName,
169
			final boolean isChild,
170
			final Builder rel,
171
			final Builder subRel) {
172
		try {
173

    
174
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
175

    
176
			parseNodelist(nodeList, rel, subRel, dataInfo);
177

    
178
			OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, collectedFromId, collectedFromName, isChild);
179

    
180
			FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString());
181
			rel.setField(subRelDescriptor, subRel.build());
182

    
183
			FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString());
184
			builder.setField(relDescriptor, rel.build());
185

    
186
			Oaf build = getOaf(builder, dataInfo);
187
			return base64(build.toByteArray());
188
		} catch (Exception e) {
189
			e.printStackTrace(System.err);
190
			throw new RuntimeException(e);
191
		}
192
	}
193

    
194
	private static void parseNodelist(final NodeList nodeList, final Builder... builders) {
195
		
196
		for (int i = 0; i < nodeList.getLength(); i++) {
197

    
198
			final Node fieldNode = nodeList.item(i);
199
			final Node attr = fieldNode.getAttributes().getNamedItem("name");
200

    
201
			final String fieldName = attr.getNodeValue();
202
			final NodeList children = fieldNode.getChildNodes();
203

    
204
			for (int j = 0; j < children.getLength(); j++) {
205

    
206
				final Node child = children.item(j);
207
				final String childName = child.getLocalName();
208
				if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) {
209
					for (Builder builder : builders) {
210
						FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName);
211
						if (desc != null) {
212
							String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child);
213
							if (!StringUtils.isBlank(text)) {
214
								addField(builder, desc, text);
215
							}
216
						}
217
					}
218
				}
219
			}
220
		}
221
	}
222

    
223
	private static String getText(final Node node) {
224
		StringBuffer result = new StringBuffer();
225
		if (!node.hasChildNodes()) { return ""; }
226

    
227
		NodeList list = node.getChildNodes();
228
		for (int i = 0; i < list.getLength(); i++) {
229
			Node subnode = list.item(i);
230
			if (subnode.getNodeType() == Node.TEXT_NODE) {
231
				result.append(subnode.getNodeValue());
232
			} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) {
233
				result.append(subnode.getNodeValue());
234
			} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
235
				result.append(getText(subnode));
236
			}
237
		}
238
		return result.toString().trim();
239
	}
240

    
241
}
(3-3/10)