Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import com.google.common.collect.Lists;
4
import com.google.protobuf.Descriptors.FieldDescriptor;
5
import com.google.protobuf.Message.Builder;
6
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder;
7
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
8
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
9
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
10
import eu.dnetlib.data.proto.DatasetProtos.Dataset;
11
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
12
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
13
import eu.dnetlib.data.proto.FieldTypeProtos.Instance;
14
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
15
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
16
import eu.dnetlib.data.proto.PersonProtos.Person;
17
import eu.dnetlib.data.proto.ProjectProtos.Project;
18
import eu.dnetlib.data.proto.PublicationProtos.Publication;
19
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
20
import eu.dnetlib.data.proto.RelProtos.*;
21
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
22
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
23
import eu.dnetlib.data.proto.SubRelProtos.*;
24
import eu.dnetlib.data.proto.TypeProtos.Type;
25
import org.apache.commons.lang3.StringUtils;
26
import org.w3c.dom.Node;
27
import org.w3c.dom.NodeList;
28

    
29
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions {
30

    
31
	public static String oafEntity(final String type,
32
			final String id,
33
			final String collectedFromId,
34
			final String collectedFromName,
35
			final String originalId,
36
			final String dateOfCollection,
37
			final String dateOfTransformation,
38
			final NodeList nodeList) {
39

    
40
		final String entityId = DNGFRowKeyDecoder.decode(id).getKey();
41
		switch (Type.valueOf(type)) {
42
		case datasource:
43
			return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
44
					dateOfTransformation, Datasource.newBuilder());
45
		case organization:
46
			return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
47
					dateOfTransformation, Organization.newBuilder());
48
		case person:
49
			return serializeOafEntity(nodeList, Type.person, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
50
					dateOfTransformation, Person.newBuilder());
51
		case project:
52
			return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
53
					dateOfTransformation, Project.newBuilder());
54
		case publication:
55
			return serializeOafEntity(nodeList, Type.publication, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
56
					dateOfTransformation, Publication.newBuilder());
57
		case dataset:
58
			return serializeOafEntity(nodeList, Type.dataset, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection,
59
					dateOfTransformation, Dataset.newBuilder());
60
		default:
61
			throw new IllegalArgumentException("Invalid entity type: " + type);
62
		}
63
	}
64

    
65
	public static String oafRel(final String relationType,
66
			final String source,
67
			final String target,
68
			final NodeList nodeList,
69
			final String relClass,
70
			final String relScheme) {
71

    
72
		final String eSource = DNGFRowKeyDecoder.decode(source).getKey();
73
		final String eTarget = DNGFRowKeyDecoder.decode(target).getKey();
74
		final RelType relType = RelType.valueOf(relationType);
75

    
76
		switch (relType) {
77
		case datasourceOrganization:
78
			Provision.Builder provision = Provision.newBuilder().setRelMetadata(
79
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme)));
80
			DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision);
81

    
82
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, false, dorg, provision);
83
		case personPublication:
84
			Authorship.Builder authP = Authorship.newBuilder().setRelMetadata(
85
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Authorship.RelName.valueOf(relClass).toString(), relScheme)));
86
			PersonPublication.Builder personPublication = PersonPublication.newBuilder().setAuthorship(authP);
87

    
88
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.authorship, relClass, false, personPublication, authP);
89
		case personDataset:
90
			Authorship.Builder authD = Authorship.newBuilder().setRelMetadata(
91
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Authorship.RelName.valueOf(relClass).toString(), relScheme)));
92
			PersonDataset.Builder personDataset = PersonDataset.newBuilder().setAuthorship(authD);
93

    
94
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.authorship, relClass, false, personDataset, authD);
95
		case projectPerson:
96
			ContactPerson.Builder contact = ContactPerson.newBuilder().setRelMetadata(
97
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(ContactPerson.RelName.valueOf(relClass).toString(), relScheme)));
98
			ProjectPerson.Builder projectPerson = ProjectPerson.newBuilder().setContactPerson(contact);
99

    
100
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.contactPerson, relClass, false, projectPerson, contact);
101
		case projectOrganization:
102
			Participation.Builder participant = Participation.newBuilder().setRelMetadata(
103
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme)));
104
			ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant);
105

    
106
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, false, projectOrganization, participant);
107
		case publicationProject:
108
			Outcome.Builder outcomeP = Outcome.newBuilder().setRelMetadata(
109
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
110
			PublicationProject.Builder publicationProject = PublicationProject.newBuilder().setOutcome(outcomeP);
111

    
112
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, false, publicationProject, outcomeP);
113
		case datasetProject:
114
			Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata(
115
					RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme)));
116
			DatasetProject.Builder datasetProject = DatasetProject.newBuilder().setOutcome(outcome);
117

    
118
			return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, false, datasetProject, outcome);
119
		default:
120
			throw new IllegalArgumentException("unhandled relType: " + relationType);
121
		}
122
	}
123

    
124
	// ////////////////////////////////////////////////////////
125

    
126
	protected static String serializeOafEntity(final NodeList nodelist,
127
			final Type type,
128
			final String id,
129
			final KeyValue collectedFrom,
130
			final String originalId,
131
			final String dateOfCollection,
132
			final String dateOfTransformation,
133
			final Builder entity) {
134
		try {
135
			final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata");
136

    
137
			final DNGFEntity.Builder parent = getEntity(type, id, collectedFrom, Lists.newArrayList(originalId), dateOfCollection, dateOfTransformation, null);
138
			final Builder metadata = entity.newBuilderForField(md);
139
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
140

    
141
			if (type.equals(Type.publication)) {
142
				final Instance.Builder instance = Instance.newBuilder();
143
				parseNodelist(nodelist, instance);
144
				FieldDescriptor instanceDescriptor = Publication.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
145
				if (instanceDescriptor != null) {
146
					entity.setField(instanceDescriptor, instance);
147
				}
148
			}
149
			if (type.equals(Type.dataset)) {
150
				final Instance.Builder instance = Instance.newBuilder();
151
				parseNodelist(nodelist, instance);
152
				FieldDescriptor instanceDescriptor = Dataset.getDescriptor().findFieldByName(Instance.getDescriptor().getName());
153
				if (instanceDescriptor != null) {
154
					entity.setField(instanceDescriptor, instance);
155
				}
156
			}
157
			parseNodelist(nodelist, parent, entity, metadata, dataInfo);
158

    
159
			final FieldDescriptor entityDescriptor = DNGFEntity.getDescriptor().findFieldByName(type.toString());
160

    
161
			final DNGF build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo);
162

    
163
			return base64(build.toByteArray());
164
		} catch (Exception e) {
165
			e.printStackTrace(System.err);
166
			throw new RuntimeException(e);
167
		}
168
	}
169

    
170
	protected static String serializeOafRel(final NodeList nodeList,
171
			final String sourceId,
172
			final String targetId,
173
			final RelType relType,
174
			final SubRelType subRelType,
175
			final String relClass,
176
			final boolean isChild,
177
			final Builder rel,
178
			final Builder subRel) {
179
		try {
180

    
181
			final DataInfo.Builder dataInfo = DataInfo.newBuilder();
182

    
183
			parseNodelist(nodeList, rel, subRel, dataInfo);
184

    
185
			DNGFRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, isChild);
186

    
187
			FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString());
188
			rel.setField(subRelDescriptor, subRel.build());
189

    
190
			FieldDescriptor relDescriptor = DNGFRel.getDescriptor().findFieldByName(relType.toString());
191
			builder.setField(relDescriptor, rel.build());
192

    
193
			DNGF build = getOaf(builder, dataInfo);
194
			return base64(build.toByteArray());
195
		} catch (Exception e) {
196
			e.printStackTrace(System.err);
197
			throw new RuntimeException(e);
198
		}
199
	}
200

    
201
	private static void parseNodelist(final NodeList nodeList, final Builder... builders) {
202
		
203
		for (int i = 0; i < nodeList.getLength(); i++) {
204

    
205
			final Node fieldNode = nodeList.item(i);
206
			final Node attr = fieldNode.getAttributes().getNamedItem("name");
207

    
208
			final String fieldName = attr.getNodeValue();
209
			final NodeList chilidren = fieldNode.getChildNodes();
210

    
211
			for (int j = 0; j < chilidren.getLength(); j++) {
212

    
213
				final Node child = chilidren.item(j);
214
				final String childName = child.getLocalName();
215
				if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) {
216
					for (Builder builder : builders) {
217
						FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName);
218
						if (desc != null) {
219
							String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child);
220
							if (!StringUtils.isBlank(text)) {
221
								addField(builder, desc, text);
222
							}
223
						}
224
					}
225
				}
226
			}
227
		}
228
	}
229

    
230
	private static String getText(final Node node) {
231
		StringBuffer result = new StringBuffer();
232
		if (!node.hasChildNodes()) { return ""; }
233

    
234
		NodeList list = node.getChildNodes();
235
		for (int i = 0; i < list.getLength(); i++) {
236
			Node subnode = list.item(i);
237
			if (subnode.getNodeType() == Node.TEXT_NODE) {
238
				result.append(subnode.getNodeValue());
239
			} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) {
240
				result.append(subnode.getNodeValue());
241
			} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
242
				result.append(getText(subnode));
243
			}
244
		}
245
		return result.toString().trim();
246
	}
247

    
248
}
(3-3/9)