Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3

    
4
import java.util.ArrayList;
5
import java.util.List;
6

    
7
import eu.dnetlib.data.proto.*;
8
import eu.dnetlib.data.proto.DliFieldTypeProtos;
9
import eu.dnetlib.data.proto.DliProtos;
10
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
11
import org.apache.commons.lang3.StringUtils;
12
import org.w3c.dom.Node;
13
import org.w3c.dom.NodeList;
14

    
15
/**
16
 * Created by sandro on 11/3/16.
17
 */
18

    
19
public class DliToHbaseXsltFunctions extends OafToHbaseXsltFunctions {
20

    
21
    public static String getCompletionStatus(NodeList about) {
22
        Node dataInfoNode = getDataInfo(about);
23
        if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
24
            for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
25
                final Node currentNode = dataInfoNode.getChildNodes().item(i);
26
                if ("completionStatus".equals(currentNode.getLocalName())) {
27
                    return currentNode.getTextContent();
28
                }
29
            }
30
        }
31
        return null;
32
    }
33

    
34
    public static Node getDataInfo(final NodeList about) {
35
        if (about.getLength() > 0) {
36
            final NodeList aboutChildren = about.item(0).getChildNodes();
37
            for (int i = 0; i < aboutChildren.getLength(); i++) {
38
                final Node currentNode = aboutChildren.item(i);
39
                if ("datainfo".equals(currentNode.getLocalName())) {
40
                    return currentNode;
41
                }
42
            }
43
        }
44
        return null;
45
    }
46

    
47

    
48
    public static String createDNetEntityIdentifier(
49
            final String pidType,
50
            final String pid,
51
            final String namespacePrefix,
52
            final String typeString) {
53

    
54
        final int typeNumber = TypeProtos.Type.valueOf(typeString).getNumber();
55
        return createEntityId(pidType, pid, namespacePrefix, "" + typeNumber);
56
    }
57

    
58

    
59
    public static String createSimpleEntity(
60
            final String pidType,
61
            final String pid,
62
            final String namespacePrefix,
63
            final NodeList about,
64
            final String type,
65
            final String completionStatus) {
66
        try {
67

    
68

    
69
            final int typeNumber = TypeProtos.Type.valueOf(type).getNumber();
70
            final String entityId = pidType.equals("dnet") ? String.format("%d|%s", typeNumber, pid) : createDNetEntityIdentifier(pidType, pid, namespacePrefix, type);
71

    
72
            final DNGFProtos.DNGFEntity.Builder entity = parseAbout(about, completionStatus);
73
            FieldTypeProtos.StructuredProperty.Builder identifier = FieldTypeProtos.StructuredProperty.newBuilder();
74
            identifier.setQualifier(getSimpleQualifier(pidType, "dnet:pid_types"));
75
            identifier.setValue(pid.trim());
76
            entity.addExtension(DliProtos.typedIdentifier, fixPid(identifier.build()));
77
            entity.setId(entityId);
78
            entity.setType(TypeProtos.Type.valueOf(type));
79
            final DNGFProtos.DNGF oaf = DNGFProtos.DNGF.newBuilder().setEntity(entity).setKind(KindProtos.Kind.entity).build();
80

    
81
            return base64(oaf.toByteArray());
82
        } catch (Throwable e) {
83
            throw new RuntimeException(e);
84
        }
85
    }
86

    
87
    public static String createEntity(
88
            final String pidType,
89
            final String pid,
90
            final String namespacePrefix,
91
            final NodeList about,
92
            final String completionStatus) {
93
        return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, completionStatus);
94
    }
95

    
96
    public static String createEntity(
97
            final String pidType,
98
            final String pid,
99
            final String namespacePrefix,
100
            final NodeList about) {
101
        return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, null);
102
    }
103

    
104
    public static String createEntityId(
105
            final String pidType,
106
            final String pid,
107
            final String namespacePrefix,
108
            final String type
109
    ) {
110
        final DLIResolvedObject obj = new DLIResolvedObject();
111
        obj.setPid(pid);
112
        obj.setPidType(pidType);
113
        final String entityId = String.format("%s|%s::%s", type, namespacePrefix, obj.getIdentifier());
114
        return entityId;
115
    }
116

    
117
    public static DNGFProtos.DNGFEntity.Builder parseAbout(final NodeList about, final String completionStatusOverride) {
118
        final DNGFProtos.DNGFEntity.Builder entity = DNGFProtos.DNGFEntity.newBuilder();
119

    
120
        final String completionStatus = getCompletionStatus(about);
121
        if (StringUtils.isNoneBlank(completionStatusOverride)) {
122
            entity.setExtension(DliProtos.completionStatus, completionStatusOverride);
123
        } else if (completionStatus != null) {
124
            entity.setExtension(DliProtos.completionStatus, completionStatus);
125
        }
126

    
127
        List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", completionStatusOverride);
128

    
129
        if (collectedFrom != null) {
130
            collectedFrom.forEach(entity::addCollectedfrom);
131
        }
132
        return entity;
133
    }
134

    
135
    public static String createRel(
136
            final String source,
137
            final String target,
138
            final String relationSemantic,
139
            final String relType,
140
            final String provenanceAction,
141
            final String trust,
142
            final NodeList about) {
143

    
144
        try {
145
            DNGFProtos.DNGFRel.Builder rel = relProto(source.trim(), target.trim(), relationSemantic, relType);
146
            List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", null);
147
            if (collectedFrom == null || collectedFrom.isEmpty()) {
148
                throw new IllegalStateException(String.format("missing provenance information in rel source '%s', target '%s'", source, target));
149
            }
150
            collectedFrom.forEach(rel::addCollectedfrom);
151
            final DNGFProtos.DNGF pmf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false));
152
            return base64(pmf.toByteArray());
153

    
154
        } catch (Throwable e) {
155
            e.printStackTrace(System.err);
156
            throw new RuntimeException(e);
157
        }
158

    
159
    }
160

    
161
    public static List<FieldTypeProtos.KeyValue.Builder> getDatasourceProvenance(NodeList about, final String nodeName, final String completionStatus) {
162
        Node dataInfoNode = getDataInfo(about);
163
        if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
164
            List<FieldTypeProtos.KeyValue.Builder> result = new ArrayList<>();
165

    
166
            for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
167
                final Node currentNode = dataInfoNode.getChildNodes().item(i);
168
                if (nodeName.equals(currentNode.getLocalName())) {
169
                    FieldTypeProtos.KeyValue.Builder currentItem = FieldTypeProtos.KeyValue.newBuilder();
170
                    final Node idNode = currentNode.getAttributes().getNamedItem("id");
171
                    final Node nameNode = currentNode.getAttributes().getNamedItem("name");
172
                    final Node completionStatusNode = currentNode.getAttributes().getNamedItem("completionStatus");
173
                    if (idNode != null) {
174
                        currentItem.setKey(idNode.getTextContent());
175
                    }
176
                    if (nameNode != null) {
177
                        currentItem.setValue(nameNode.getTextContent());
178
                    }
179
                    if (completionStatusNode != null) {
180
                        if (StringUtils.isBlank(completionStatus)) {
181
                            currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatusNode.getTextContent());
182
                        } else {
183
                            currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatus);
184
                        }
185
                    }
186

    
187
                    result.add(currentItem);
188
                }
189
            }
190
            return result;
191
        }
192
        return null;
193
    }
194

    
195

    
196
    public static FieldTypeProtos.StructuredProperty fixPid(final FieldTypeProtos.StructuredProperty inputPid) {
197
        if (inputPid == null) {
198
            return null;
199
        }
200
        String pid = inputPid.getValue();
201
        String pidType = inputPid.getQualifier().getClassid();
202

    
203
        if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidType)) {
204
            return null;
205
        }
206
        pidType = pidType.trim().toLowerCase();
207
        pid = pid.trim().toLowerCase();
208

    
209
        if ("doi".equals(pidType)) {
210
            pid = pid.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
211
        }
212
        return getStructuredProperty(pid, pidType, pidType, inputPid.getQualifier().getSchemeid(), inputPid.getQualifier().getSchemename());
213
    }
214
}
(1-1/3)