Revision 44352
Added by Sandro La Bruzzo over 7 years ago
PmfToHbaseXsltFunctions.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.transform.xml; |
2 | 2 |
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import com.google.common.base.Predicates; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
3 | 9 |
import com.google.protobuf.Descriptors.Descriptor; |
10 |
import com.google.protobuf.Message; |
|
4 | 11 |
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder; |
5 | 12 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
6 | 13 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
7 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity.Builder; |
|
14 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel; |
|
15 |
import eu.dnetlib.data.proto.DliFieldTypeProtos; |
|
16 |
import eu.dnetlib.data.proto.DliProtos; |
|
17 |
import eu.dnetlib.data.proto.DliRels; |
|
18 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
19 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
8 | 20 |
import eu.dnetlib.data.proto.PublicationProtos.Publication; |
21 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
22 |
import eu.dnetlib.data.proto.RelProtos.PublicationPublication; |
|
23 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
24 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
25 |
import eu.dnetlib.data.proto.SubRelProtos.IsRelatedTo; |
|
26 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
27 |
import eu.dnetlib.resolver.model.ResolvedObject; |
|
28 |
import org.apache.commons.lang3.StringUtils; |
|
29 |
import org.w3c.dom.Node; |
|
9 | 30 |
import org.w3c.dom.NodeList; |
10 | 31 |
|
11 | 32 |
/** |
... | ... | |
13 | 34 |
*/ |
14 | 35 |
public class PmfToHbaseXsltFunctions extends OafToHbaseXsltFunctions { |
15 | 36 |
|
37 |
//./@relatedIdentifierType, ., $namespaceprefix, $about) |
|
38 |
public static String createEntity( |
|
39 |
final String pidType, |
|
40 |
final String pid, |
|
41 |
final String namespacePrefix, |
|
42 |
final NodeList about) { |
|
43 |
try { |
|
44 |
|
|
45 |
final ResolvedObject obj = new ResolvedObject(); |
|
46 |
obj.setPid(pid); |
|
47 |
obj.setPidType(pidType); |
|
48 |
final String entityId = String.format("70|%s::%s", namespacePrefix, obj.getIdentifier()); |
|
49 |
|
|
50 |
final DNGFEntity.Builder entity = parseAbout(about); |
|
51 |
StructuredProperty.Builder identifier = StructuredProperty.newBuilder(); |
|
52 |
identifier.setQualifier(getSimpleQualifier(pidType, pidType)); |
|
53 |
identifier.setValue(pid); |
|
54 |
entity.addExtension(DliProtos.typedIdentifier, identifier.build()); |
|
55 |
|
|
56 |
entity.setId(entityId); |
|
57 |
entity.setType(Type.unknown); |
|
58 |
|
|
59 |
final DNGF oaf = getOaf(entity, getDataInfo(about, null, null, false, false)); |
|
60 |
return base64(oaf.toByteArray()); |
|
61 |
} catch (Throwable e) { |
|
62 |
e.printStackTrace(); |
|
63 |
return null; |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
|
16 | 70 |
//$publicationId, $provenance, $trust, $about, $originalid, $dateofcollection, $dateoftransformation, $metadata |
17 | 71 |
public static String pmfPublication( |
18 | 72 |
final String resultId, |
... | ... | |
29 | 83 |
final Descriptor mDesc = Publication.Metadata.getDescriptor(); |
30 | 84 |
|
31 | 85 |
final DNGFEntity.Builder entity = parseAbout(about); |
86 |
addResolvedFrom(entity, about); |
|
32 | 87 |
|
33 |
//final Publication.Metadata.Builder metadata = buildMetadata(values, mDesc); |
|
34 |
//final Publication.Builder result = buildPublication(metadata, values, mDesc, hostedbyId, hostedbyName); |
|
35 |
//final DNGFEntity.Builder entity = buildOafEntity(result, entityId, metadataNodes, getKV(collectedFromId, collectedFromName), originalId); |
|
88 |
final Publication.Metadata.Builder metadata = buildMetadata(values, mDesc); |
|
89 |
final Publication.Builder publication = buildPublication(metadata, values, mDesc, "", ""); |
|
90 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
91 |
pids.addAll(parsePids(metadataNodes)); |
|
92 |
entity.setType(Type.publication).setId(resultId); |
|
93 |
entity.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation); |
|
94 |
entity.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection); |
|
95 |
entity.addAllPid(Iterables.filter(pids, Predicates.notNull())); |
|
36 | 96 |
entity.setDateofcollection(dateOfCollection) |
37 | 97 |
.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about)); |
38 |
|
|
98 |
entity.setPublication(publication); |
|
39 | 99 |
final DNGF oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false)); |
40 | 100 |
return base64(oaf.toByteArray()); |
41 | 101 |
} catch (final Throwable e) { |
... | ... | |
44 | 104 |
return null; |
45 | 105 |
} |
46 | 106 |
|
47 |
private static Builder parseAbout(final NodeList about) { |
|
107 |
protected static List<StructuredProperty> parsePids(final NodeList nodelist) { |
|
108 |
|
|
109 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
110 |
|
|
111 |
for (int i = 0; i < nodelist.getLength(); i++) { |
|
112 |
final Node node = nodelist.item(i); |
|
113 |
Node pidType = null; |
|
114 |
if (node.getNodeType() == Node.ELEMENT_NODE) { |
|
115 |
if (node.getLocalName().equalsIgnoreCase("pid")) { |
|
116 |
pidType = node.getAttributes().getNamedItem("type"); |
|
117 |
|
|
118 |
pids.add(getStructuredProperty(node.getTextContent(), pidType.getTextContent(), getClassName(pidType.getTextContent()), "dnet:pid_types", |
|
119 |
"dnet:pid_types")); |
|
120 |
} |
|
121 |
} |
|
122 |
} |
|
123 |
return pids; |
|
124 |
} |
|
125 |
|
|
126 |
//$publicationId, |
|
127 |
// $datasetId, 'publicationDataset', |
|
128 |
// 'publicationDataset', 'isRelatedTo', $provenance, $trust, $about |
|
129 |
|
|
130 |
public static String createRel( |
|
131 |
final String source, |
|
132 |
final String target, |
|
133 |
final String relType, |
|
134 |
final String subRelType, |
|
135 |
final String relationSemantic, |
|
136 |
final String provenanceAction, |
|
137 |
final String trust, |
|
138 |
final NodeList about) { |
|
139 |
|
|
140 |
try { |
|
141 |
final String eSource = DNGFRowKeyDecoder.decode(source).getKey(); |
|
142 |
final String eTarget = DNGFRowKeyDecoder.decode(target).getKey(); |
|
143 |
|
|
144 |
final RelType rType = RelType.valueOf(relType); |
|
145 |
final SubRelType srType = SubRelType.valueOf(subRelType); |
|
146 |
|
|
147 |
final RelMetadata.Builder metadata = RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relationSemantic, relationSemantic)); |
|
148 |
|
|
149 |
final Message.Builder subRel = getSubRelBuilder(metadata, srType, null); |
|
150 |
|
|
151 |
final DNGFRel.Builder rel = getDLIRelBuilder(rType, srType, getRel(eSource, eTarget, rType, srType, relType, false), subRel); |
|
152 |
|
|
153 |
final List<KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom"); |
|
154 |
|
|
155 |
if (collectedFrom != null) { |
|
156 |
collectedFrom.forEach(it -> rel.addCollectedfrom(it)); |
|
157 |
} |
|
158 |
|
|
159 |
final DNGF oaf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false)); |
|
160 |
return base64(oaf.toByteArray()); |
|
161 |
} catch (Throwable e) { |
|
162 |
e.printStackTrace(System.err); |
|
163 |
throw new RuntimeException(e); |
|
164 |
} |
|
165 |
|
|
166 |
} |
|
167 |
|
|
168 |
private static DNGFRel.Builder getDLIRelBuilder(final RelType rType, final SubRelType subRelType, DNGFRel.Builder rel, final Message.Builder subRel) { |
|
169 |
|
|
170 |
if (rType == RelType.publicationPublication) { |
|
171 |
final PublicationPublication.Builder pp = PublicationPublication.newBuilder(); |
|
172 |
if (subRelType == SubRelType.isRelatedTo) { |
|
173 |
pp.setExtension(DliRels.isRelatedTo, (IsRelatedTo) subRel.build()); |
|
174 |
return rel.setPublicationPublication(pp); |
|
175 |
} |
|
176 |
} |
|
177 |
return getRelBuilder(rType, subRelType, rel, subRel); |
|
178 |
} |
|
179 |
|
|
180 |
private static DNGFEntity.Builder parseAbout(final NodeList about) { |
|
48 | 181 |
final DNGFEntity.Builder entity = DNGFEntity.newBuilder(); |
49 | 182 |
|
183 |
final String completionStatus = getCompletionStatus(about); |
|
184 |
if (completionStatus != null) { |
|
185 |
entity.setExtension(DliProtos.completionStatus, "complete"); |
|
186 |
} |
|
50 | 187 |
|
188 |
List<KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom"); |
|
189 |
|
|
190 |
if (collectedFrom != null) { |
|
191 |
collectedFrom.forEach(it -> entity.addCollectedfrom(it)); |
|
192 |
} |
|
193 |
|
|
194 |
|
|
51 | 195 |
return entity; |
52 | 196 |
} |
53 | 197 |
|
198 |
private static void addResolvedFrom(DNGFEntity.Builder entity, NodeList about) { |
|
199 |
List<KeyValue.Builder> resolvedFrom = getDatasourceProvenance(about, "resolvedFrom"); |
|
200 |
if (resolvedFrom != null) { |
|
201 |
resolvedFrom.forEach(it -> entity.addExtension(DliProtos.resolvedfrom, it.build())); |
|
202 |
} |
|
203 |
} |
|
204 |
|
|
205 |
private static List<KeyValue.Builder> getDatasourceProvenance(NodeList about, final String nodeName) { |
|
206 |
Node dataInfoNode = getDataInfo(about); |
|
207 |
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) { |
|
208 |
List<KeyValue.Builder> result = new ArrayList<>(); |
|
209 |
|
|
210 |
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) { |
|
211 |
final Node currentNode = dataInfoNode.getChildNodes().item(i); |
|
212 |
if (nodeName.equals(currentNode.getLocalName())) { |
|
213 |
KeyValue.Builder currentItem = KeyValue.newBuilder(); |
|
214 |
final Node idNode = currentNode.getAttributes().getNamedItem("id"); |
|
215 |
final Node nameNode = currentNode.getAttributes().getNamedItem("name"); |
|
216 |
final Node completionStatusNode = currentNode.getAttributes().getNamedItem("completionStatus"); |
|
217 |
if (idNode != null) { |
|
218 |
currentItem.setKey(idNode.getTextContent()); |
|
219 |
} |
|
220 |
if (nameNode != null) { |
|
221 |
currentItem.setValue(nameNode.getTextContent()); |
|
222 |
} |
|
223 |
if (completionStatusNode != null) { |
|
224 |
currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatusNode.getTextContent()); |
|
225 |
} |
|
226 |
result.add(currentItem); |
|
227 |
} |
|
228 |
} |
|
229 |
return result; |
|
230 |
} |
|
231 |
return null; |
|
232 |
} |
|
233 |
|
|
234 |
private static String getCompletionStatus(NodeList about) { |
|
235 |
Node dataInfoNode = getDataInfo(about); |
|
236 |
if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) { |
|
237 |
for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) { |
|
238 |
final Node currentNode = dataInfoNode.getChildNodes().item(i); |
|
239 |
if ("completionStatus".equals(currentNode.getLocalName())) { |
|
240 |
return currentNode.getTextContent(); |
|
241 |
} |
|
242 |
} |
|
243 |
} |
|
244 |
return null; |
|
245 |
} |
|
246 |
|
|
247 |
private static Node getDataInfo(final NodeList about) { |
|
248 |
if (about.getLength() > 0) { |
|
249 |
final NodeList aboutChilds = about.item(0).getChildNodes(); |
|
250 |
for (int i = 0; i < aboutChilds.getLength(); i++) { |
|
251 |
final Node currentNode = aboutChilds.item(i); |
|
252 |
if ("datainfo".equals(currentNode.getLocalName())) { |
|
253 |
return currentNode; |
|
254 |
} |
|
255 |
} |
|
256 |
} |
|
257 |
return null; |
|
258 |
} |
|
259 |
|
|
260 |
|
|
261 |
|
|
54 | 262 |
} |
Also available in: Unified diff
fixed bug