Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4

    
5
import com.google.common.collect.Lists;
6
import com.google.protobuf.Descriptors.FieldDescriptor;
7
import com.google.protobuf.Message.Builder;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
10
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
11
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
12
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
13
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafEntity;
16
import eu.dnetlib.data.proto.OafProtos.OafRel;
17
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
18
import eu.dnetlib.data.proto.PersonProtos.Person;
19
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
20
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
21
import eu.dnetlib.data.proto.ProjectProtos.Project;
22
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
23
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
24
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
25
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
26
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
27
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
28
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
29
import eu.dnetlib.data.proto.ResultProtos.Result;
30
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
31
import eu.dnetlib.data.proto.TypeProtos.Type;
32
import org.apache.commons.lang.StringUtils;
33
import org.w3c.dom.Node;
34
import org.w3c.dom.NodeList;
35

    
36
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
37

    
38
	public static String oafEntity(final String type,
39
			final String id,
40
			final String collectedFromId,
41
			final String collectedFromName,
42
			final NodeList identities,
43
			final String dateOfCollection,
44
			final String dateOfTransformation,
45
			final NodeList nodeList) {
46

    
47
		final String entityId = OafRowKeyDecoder.decode(id).getKey();
48
		List<String> ids = Lists.newArrayList();
49
		for(int i = 0; i < identities.getLength(); i++){
50
			Node n = identities.item(i);
51
			String s = n.getTextContent();
52
			ids.add(s);
53
		}
54
		switch (Type.valueOf(type)) {
55
		case datasource:
56
			return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
57
					dateOfTransformation, Datasource.newBuilder());
58
		case organization:
59
			return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
60
					dateOfTransformation, Organization.newBuilder());
61
		case person:
62
			return serializeOafEntity(nodeList, Type.person, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
63
					dateOfTransformation, Person.newBuilder());
64
		case project:
65
			return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
66
					dateOfTransformation, Project.newBuilder());
67
		case result:
68
			return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
69
					dateOfTransformation ,Result.newBuilder());
70
		default:
71
			throw new IllegalArgumentException("Invalid entity type: " + type);
72
		}
73
	}
74

    
75
	public static String oafRel(final String relationType,
76
			final String source,
77
			final String target,
78
			final NodeList nodeList,
79
			final String relClass,
80
			final String relScheme) {
81
		return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null);
82
	}
83

    
84
	public static String oafRel(final String relationType,
85
			final String source,
86
			final String target,
87
			final NodeList nodeList,
88
			final String relClass,
89
			final String relScheme,
90
			final String collectedFromId,
91
			final String collectedFromName) {
92

    
93
		final String eSource = OafRowKeyDecoder.decode(source).getKey();
94
		final String eTarget = OafRowKeyDecoder.decode(target).getKey();
95
		final RelType relType = RelType.valueOf(relationType);
96

    
97
		switch (relType) {
98
		case datasourceOrganization:
99
			Provision.Builder provision = Provision.newBuilder().setRelMetadata(
100
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme)));
101
			DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision);
102

    
103
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision);
104
		case projectOrganization:
105
			Participation.Builder participant = Participation.newBuilder().setRelMetadata(
106
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme)));
107
			ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant);
108

    
109
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant);
110
		case resultProject:
111
			Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(
112
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
113
			ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome);
114

    
115
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome);
116
		case resultOrganization:
117
			Affiliation.Builder affiliation = Affiliation.newBuilder().setRelMetadata(
118
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Affiliation.RelName.valueOf(relClass).toString(), relScheme)));
119
			ResultOrganization.Builder resultOrganization = ResultOrganization.newBuilder().setAffiliation(affiliation);
120

    
121
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.affiliation, relClass, collectedFromId, collectedFromName, false, resultOrganization, affiliation);
122
		default:
123
			throw new IllegalArgumentException("unhandled relType: " + relationType);
124
		}
125
	}
126

    
127
	// ////////////////////////////////////////////////////////
128

    
129
	protected static String serializeOafEntity(final NodeList nodelist,
130
			final Type type,
131
			final String id,
132
			final KeyValue collectedFrom,
133
			final List<String> identities,
134
			final String dateOfCollection,
135
			final String dateOfTransformation,
136
			final Builder entity) {
137
		try {
138
			final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata");
139

    
140
			final OafEntity.Builder parent = getEntity(type, id, collectedFrom, identities, dateOfCollection, dateOfTransformation, null);
141
			final Builder metadata = entity.newBuilderForField(md);
142
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
143

    
144
			if (type.equals(Type.result)) {
145
				final Instance.Builder instance = Instance.newBuilder();
146
				parseNodelist(nodelist, instance);
147
				FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
148
				if (instanceDescriptor != null) {
149
					entity.setField(instanceDescriptor, instance);
150
				}
151
			}
152
			parseNodelist(nodelist, parent, entity, metadata, dataInfo);
153

    
154
			final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString());
155

    
156
			final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo);
157

    
158
			return base64(build.toByteArray());
159
		} catch (Exception e) {
160
			e.printStackTrace(System.err);
161
			throw new RuntimeException(e);
162
		}
163
	}
164

    
165
	protected static String serializeOafRel(final NodeList nodeList,
166
			final String sourceId,
167
			final String targetId,
168
			final RelType relType,
169
			final SubRelType subRelType,
170
			final String relClass,
171
			final String collectedFromId,
172
			final String collectedFromName,
173
			final boolean isChild,
174
			final Builder rel,
175
			final Builder subRel) {
176
		try {
177

    
178
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
179

    
180
			parseNodelist(nodeList, rel, subRel, dataInfo);
181

    
182
			OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, collectedFromId, collectedFromName, isChild);
183

    
184
			FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString());
185
			rel.setField(subRelDescriptor, subRel.build());
186

    
187
			FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString());
188
			builder.setField(relDescriptor, rel.build());
189

    
190
			Oaf build = getOaf(builder, dataInfo);
191
			return base64(build.toByteArray());
192
		} catch (Exception e) {
193
			e.printStackTrace(System.err);
194
			throw new RuntimeException(e);
195
		}
196
	}
197

    
198
	private static void parseNodelist(final NodeList nodeList, final Builder... builders) {
199
		
200
		for (int i = 0; i < nodeList.getLength(); i++) {
201

    
202
			final Node fieldNode = nodeList.item(i);
203
			final Node attr = fieldNode.getAttributes().getNamedItem("name");
204

    
205
			final String fieldName = attr.getNodeValue();
206
			final NodeList children = fieldNode.getChildNodes();
207

    
208
			for (int j = 0; j < children.getLength(); j++) {
209

    
210
				final Node child = children.item(j);
211
				final String childName = child.getLocalName();
212
				if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) {
213
					for (Builder builder : builders) {
214
						FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName);
215
						if (desc != null) {
216
							String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child);
217
							if (!StringUtils.isBlank(text)) {
218
								addField(builder, desc, text);
219
							}
220
						}
221
					}
222
				}
223
			}
224
		}
225
	}
226

    
227
	private static String getText(final Node node) {
228
		StringBuffer result = new StringBuffer();
229
		if (!node.hasChildNodes()) { return ""; }
230

    
231
		NodeList list = node.getChildNodes();
232
		for (int i = 0; i < list.getLength(); i++) {
233
			Node subnode = list.item(i);
234
			if (subnode.getNodeType() == Node.TEXT_NODE) {
235
				result.append(subnode.getNodeValue());
236
			} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) {
237
				result.append(subnode.getNodeValue());
238
			} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
239
				result.append(getText(subnode));
240
			}
241
		}
242
		return result.toString().trim();
243
	}
244

    
245
}
(3-3/10)