Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.util.List;
4

    
5
import com.google.common.collect.Lists;
6
import com.google.protobuf.Descriptors.FieldDescriptor;
7
import com.google.protobuf.Message.Builder;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
10
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
11
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
12
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
13
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafEntity;
16
import eu.dnetlib.data.proto.OafProtos.OafRel;
17
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
18
import eu.dnetlib.data.proto.PersonProtos.Person;
19
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult;
20
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
21
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
22
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
23
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson;
24
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson;
25
import eu.dnetlib.data.proto.ProjectProtos.Project;
26
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
27
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
28
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
29
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
30
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
31
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
32
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
33
import eu.dnetlib.data.proto.ResultProtos.Result;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35
import eu.dnetlib.data.proto.TypeProtos.Type;
36
import org.apache.commons.lang.StringUtils;
37
import org.w3c.dom.Node;
38
import org.w3c.dom.NodeList;
39

    
40
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
41

    
42
	public static String oafEntity(final String type,
43
			final String id,
44
			final String collectedFromId,
45
			final String collectedFromName,
46
			final NodeList identities,
47
			final String dateOfCollection,
48
			final String dateOfTransformation,
49
			final NodeList nodeList) {
50

    
51
		final String entityId = OafRowKeyDecoder.decode(id).getKey();
52
		List<String> ids = Lists.newArrayList();
53
		for(int i = 0; i < identities.getLength(); i++){
54
			Node n = identities.item(i);
55
			String s = n.getTextContent();
56
			ids.add(s);
57
		}
58
		switch (Type.valueOf(type)) {
59
		case datasource:
60
			return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
61
					dateOfTransformation, Datasource.newBuilder());
62
		case organization:
63
			return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
64
					dateOfTransformation, Organization.newBuilder());
65
		case person:
66
			return serializeOafEntity(nodeList, Type.person, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
67
					dateOfTransformation, Person.newBuilder());
68
		case project:
69
			return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
70
					dateOfTransformation, Project.newBuilder());
71
		case result:
72
			return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection,
73
					dateOfTransformation ,Result.newBuilder());
74
		default:
75
			throw new IllegalArgumentException("Invalid entity type: " + type);
76
		}
77
	}
78

    
79
	public static String oafRel(final String relationType,
80
			final String source,
81
			final String target,
82
			final NodeList nodeList,
83
			final String relClass,
84
			final String relScheme) {
85
		return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null);
86
	}
87

    
88
	public static String oafRel(final String relationType,
89
			final String source,
90
			final String target,
91
			final NodeList nodeList,
92
			final String relClass,
93
			final String relScheme,
94
			final String collectedFromId,
95
			final String collectedFromName) {
96

    
97
		final String eSource = OafRowKeyDecoder.decode(source).getKey();
98
		final String eTarget = OafRowKeyDecoder.decode(target).getKey();
99
		final RelType relType = RelType.valueOf(relationType);
100

    
101
		switch (relType) {
102
		case datasourceOrganization:
103
			Provision.Builder provision = Provision.newBuilder().setRelMetadata(
104
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme)));
105
			DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision);
106

    
107
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision);
108
		case personResult:
109
			Authorship.Builder auth = Authorship.newBuilder().setRelMetadata(
110
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Authorship.RelName.valueOf(relClass).toString(), relScheme)));
111
			PersonResult.Builder personResult = PersonResult.newBuilder().setAuthorship(auth);
112

    
113
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.authorship, relClass, collectedFromId, collectedFromName, false, personResult, auth);
114
		case projectPerson:
115
			ContactPerson.Builder contact = ContactPerson.newBuilder().setRelMetadata(
116
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(ContactPerson.RelName.valueOf(relClass).toString(), relScheme)));
117
			ProjectPerson.Builder projectPerson = ProjectPerson.newBuilder().setContactPerson(contact);
118

    
119
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.contactPerson, relClass, collectedFromId, collectedFromName, false, projectPerson, contact);
120
		case projectOrganization:
121
			Participation.Builder participant = Participation.newBuilder().setRelMetadata(
122
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme)));
123
			ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant);
124

    
125
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant);
126
		case resultProject:
127
			Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(
128
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
129
			ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome);
130

    
131
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome);
132
		case resultOrganization:
133
			Affiliation.Builder affiliation = Affiliation.newBuilder().setRelMetadata(
134
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Affiliation.RelName.valueOf(relClass).toString(), relScheme)));
135
			ResultOrganization.Builder resultOrganization = ResultOrganization.newBuilder().setAffiliation(affiliation);
136

    
137
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.affiliation, relClass, collectedFromId, collectedFromName, false, resultOrganization, affiliation);
138
		default:
139
			throw new IllegalArgumentException("unhandled relType: " + relationType);
140
		}
141
	}
142

    
143
	// ////////////////////////////////////////////////////////
144

    
145
	protected static String serializeOafEntity(final NodeList nodelist,
146
			final Type type,
147
			final String id,
148
			final KeyValue collectedFrom,
149
			final List<String> identities,
150
			final String dateOfCollection,
151
			final String dateOfTransformation,
152
			final Builder entity) {
153
		try {
154
			final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata");
155

    
156
			final OafEntity.Builder parent = getEntity(type, id, collectedFrom, identities, dateOfCollection, dateOfTransformation, null);
157
			final Builder metadata = entity.newBuilderForField(md);
158
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
159

    
160
			if (type.equals(Type.result)) {
161
				final Instance.Builder instance = Instance.newBuilder();
162
				parseNodelist(nodelist, instance);
163
				FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
164
				if (instanceDescriptor != null) {
165
					entity.setField(instanceDescriptor, instance);
166
				}
167
			}
168
			parseNodelist(nodelist, parent, entity, metadata, dataInfo);
169

    
170
			final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString());
171

    
172
			final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo);
173

    
174
			return base64(build.toByteArray());
175
		} catch (Exception e) {
176
			e.printStackTrace(System.err);
177
			throw new RuntimeException(e);
178
		}
179
	}
180

    
181
	protected static String serializeOafRel(final NodeList nodeList,
182
			final String sourceId,
183
			final String targetId,
184
			final RelType relType,
185
			final SubRelType subRelType,
186
			final String relClass,
187
			final String collectedFromId,
188
			final String collectedFromName,
189
			final boolean isChild,
190
			final Builder rel,
191
			final Builder subRel) {
192
		try {
193

    
194
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
195

    
196
			parseNodelist(nodeList, rel, subRel, dataInfo);
197

    
198
			OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, collectedFromId, collectedFromName, isChild);
199

    
200
			FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString());
201
			rel.setField(subRelDescriptor, subRel.build());
202

    
203
			FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString());
204
			builder.setField(relDescriptor, rel.build());
205

    
206
			Oaf build = getOaf(builder, dataInfo);
207
			return base64(build.toByteArray());
208
		} catch (Exception e) {
209
			e.printStackTrace(System.err);
210
			throw new RuntimeException(e);
211
		}
212
	}
213

    
214
	private static void parseNodelist(final NodeList nodeList, final Builder... builders) {
215
		
216
		for (int i = 0; i < nodeList.getLength(); i++) {
217

    
218
			final Node fieldNode = nodeList.item(i);
219
			final Node attr = fieldNode.getAttributes().getNamedItem("name");
220

    
221
			final String fieldName = attr.getNodeValue();
222
			final NodeList children = fieldNode.getChildNodes();
223

    
224
			for (int j = 0; j < children.getLength(); j++) {
225

    
226
				final Node child = children.item(j);
227
				final String childName = child.getLocalName();
228
				if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) {
229
					for (Builder builder : builders) {
230
						FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName);
231
						if (desc != null) {
232
							String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child);
233
							if (!StringUtils.isBlank(text)) {
234
								addField(builder, desc, text);
235
							}
236
						}
237
					}
238
				}
239
			}
240
		}
241
	}
242

    
243
	private static String getText(final Node node) {
244
		StringBuffer result = new StringBuffer();
245
		if (!node.hasChildNodes()) { return ""; }
246

    
247
		NodeList list = node.getChildNodes();
248
		for (int i = 0; i < list.getLength(); i++) {
249
			Node subnode = list.item(i);
250
			if (subnode.getNodeType() == Node.TEXT_NODE) {
251
				result.append(subnode.getNodeValue());
252
			} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) {
253
				result.append(subnode.getNodeValue());
254
			} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
255
				result.append(getText(subnode));
256
			}
257
		}
258
		return result.toString().trim();
259
	}
260

    
261
}
(3-3/10)