Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3

    
4
import java.util.ArrayList;
5
import java.util.List;
6

    
7
import eu.dnetlib.data.proto.*;
8
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
9
import org.apache.commons.lang3.StringUtils;
10
import org.w3c.dom.Node;
11
import org.w3c.dom.NodeList;
12

    
13
/**
14
 * Created by sandro on 11/3/16.
15
 */
16

    
17
public class DliToHbaseXsltFunctions extends OafToHbaseXsltFunctions {
18

    
19
    public static String getCompletionStatus(NodeList about) {
20
        Node dataInfoNode = getDataInfo(about);
21
        if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
22
            for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
23
                final Node currentNode = dataInfoNode.getChildNodes().item(i);
24
                if ("completionStatus".equals(currentNode.getLocalName())) {
25
                    return currentNode.getTextContent();
26
                }
27
            }
28
        }
29
        return null;
30
    }
31

    
32
    public static Node getDataInfo(final NodeList about) {
33
        if (about.getLength() > 0) {
34
            final NodeList aboutChildren = about.item(0).getChildNodes();
35
            for (int i = 0; i < aboutChildren.getLength(); i++) {
36
                final Node currentNode = aboutChildren.item(i);
37
                if ("datainfo".equals(currentNode.getLocalName())) {
38
                    return currentNode;
39
                }
40
            }
41
        }
42
        return null;
43
    }
44

    
45

    
46
    public static String createDNetEntityIdentifier(
47
            final String pidType,
48
            final String pid,
49
            final String namespacePrefix,
50
            final String typeString) {
51

    
52
        final int typeNumber = TypeProtos.Type.valueOf(typeString).getNumber();
53
        return createEntityId(pidType, pid, namespacePrefix, "" + typeNumber);
54
    }
55

    
56

    
57
    public static String createSimpleEntity(
58
            final String pidType,
59
            final String pid,
60
            final String namespacePrefix,
61
            final NodeList about,
62
            final String type,
63
            final String completionStatus) {
64
        try {
65

    
66

    
67
            final int typeNumber = TypeProtos.Type.valueOf(type).getNumber();
68
            final String entityId = pidType.equals("dnet") ? String.format("%d|%s", typeNumber, pid) : createDNetEntityIdentifier(pidType, pid, namespacePrefix, type);
69

    
70
            final DNGFProtos.DNGFEntity.Builder entity = parseAbout(about, completionStatus);
71
            FieldTypeProtos.StructuredProperty.Builder identifier = FieldTypeProtos.StructuredProperty.newBuilder();
72
            identifier.setQualifier(getSimpleQualifier(pidType, "dnet:pid_types"));
73
            identifier.setValue(pid.trim());
74
            entity.addExtension(DliProtos.typedIdentifier, fixPid(identifier.build()));
75
            entity.setId(entityId);
76
            entity.setType(TypeProtos.Type.valueOf(type));
77
            final DNGFProtos.DNGF oaf = DNGFProtos.DNGF.newBuilder().setEntity(entity).setKind(KindProtos.Kind.entity).build();
78

    
79
            return base64(oaf.toByteArray());
80
        } catch (Throwable e) {
81
            throw new RuntimeException(e);
82
        }
83
    }
84

    
85
    public static String createEntity(
86
            final String pidType,
87
            final String pid,
88
            final String namespacePrefix,
89
            final NodeList about,
90
            final String completionStatus) {
91
        return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, completionStatus);
92
    }
93

    
94
    public static String createEntity(
95
            final String pidType,
96
            final String pid,
97
            final String namespacePrefix,
98
            final NodeList about) {
99
        return createSimpleEntity(pidType, pid, namespacePrefix, about, "" + TypeProtos.Type.unknown, null);
100
    }
101

    
102
    public static String createEntityId(
103
            final String pidType,
104
            final String pid,
105
            final String namespacePrefix,
106
            final String type
107
    ) {
108
        final DLIResolvedObject obj = new DLIResolvedObject();
109
        obj.setPid(pid);
110
        obj.setPidType(pidType);
111
        final String entityId = String.format("%s|%s::%s", type, namespacePrefix, obj.getIdentifier());
112
        return entityId;
113
    }
114

    
115
    public static DNGFProtos.DNGFEntity.Builder parseAbout(final NodeList about, final String completionStatusOverride) {
116
        final DNGFProtos.DNGFEntity.Builder entity = DNGFProtos.DNGFEntity.newBuilder();
117

    
118
        final String completionStatus = getCompletionStatus(about);
119
        if (StringUtils.isNoneBlank(completionStatusOverride)) {
120
            entity.setExtension(DliProtos.completionStatus, completionStatusOverride);
121
        } else if (completionStatus != null) {
122
            entity.setExtension(DliProtos.completionStatus, completionStatus);
123
        }
124

    
125
        List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", completionStatusOverride);
126

    
127
        if (collectedFrom != null) {
128
            collectedFrom.forEach(entity::addCollectedfrom);
129
        }
130
        return entity;
131
    }
132

    
133
    public static String createRel(
134
            final String source,
135
            final String target,
136
            final String relationSemantic,
137
            final String relType,
138
            final String provenanceAction,
139
            final String trust,
140
            final NodeList about) {
141

    
142
        try {
143
            DNGFProtos.DNGFRel.Builder rel = relProto(source.trim(), target.trim(), relationSemantic, relType);
144
            List<FieldTypeProtos.KeyValue.Builder> collectedFrom = getDatasourceProvenance(about, "collectedFrom", null);
145
            if (collectedFrom == null || collectedFrom.isEmpty()) {
146
                throw new IllegalStateException(String.format("missing provenance information in rel source '%s', target '%s'", source, target));
147
            }
148
            collectedFrom.forEach(rel::addCollectedfrom);
149
            final DNGFProtos.DNGF pmf = getOaf(rel, getDataInfo(about, provenanceAction, trust, false, false));
150
            return base64(pmf.toByteArray());
151

    
152
        } catch (Throwable e) {
153
            e.printStackTrace(System.err);
154
            throw new RuntimeException(e);
155
        }
156

    
157
    }
158

    
159
    public static List<FieldTypeProtos.KeyValue.Builder> getDatasourceProvenance(NodeList about, final String nodeName, final String completionStatus) {
160
        Node dataInfoNode = getDataInfo(about);
161
        if (dataInfoNode != null && dataInfoNode.getChildNodes() != null && dataInfoNode.getChildNodes().getLength() > 0) {
162
            List<FieldTypeProtos.KeyValue.Builder> result = new ArrayList<>();
163

    
164
            for (int i = 0; i < dataInfoNode.getChildNodes().getLength(); i++) {
165
                final Node currentNode = dataInfoNode.getChildNodes().item(i);
166
                if (nodeName.equals(currentNode.getLocalName())) {
167
                    FieldTypeProtos.KeyValue.Builder currentItem = FieldTypeProtos.KeyValue.newBuilder();
168
                    final Node idNode = currentNode.getAttributes().getNamedItem("id");
169
                    final Node nameNode = currentNode.getAttributes().getNamedItem("name");
170
                    final Node completionStatusNode = currentNode.getAttributes().getNamedItem("completionStatus");
171
                    if (idNode != null) {
172
                        currentItem.setKey(idNode.getTextContent());
173
                    }
174
                    if (nameNode != null) {
175
                        currentItem.setValue(nameNode.getTextContent());
176
                    }
177
                    if (completionStatusNode != null) {
178
                        if (StringUtils.isBlank(completionStatus)) {
179
                            currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatusNode.getTextContent());
180
                        } else {
181
                            currentItem.setExtension(DliFieldTypeProtos.completionStatus, completionStatus);
182
                        }
183
                    }
184

    
185
                    result.add(currentItem);
186
                }
187
            }
188
            return result;
189
        }
190
        return null;
191
    }
192

    
193

    
194
    public static FieldTypeProtos.StructuredProperty fixPid(final FieldTypeProtos.StructuredProperty inputPid) {
195
        if (inputPid == null) {
196
            return null;
197
        }
198
        String pid = inputPid.getValue();
199
        String pidType = inputPid.getQualifier().getClassid();
200

    
201
        if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidType)) {
202
            return null;
203
        }
204
        pidType = pidType.trim().toLowerCase();
205
        pid = pid.trim().toLowerCase();
206

    
207
        if ("doi".equals(pidType)) {
208
            pid = pid.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
209
        }
210
        return getStructuredProperty(pid, pidType, pidType, inputPid.getQualifier().getSchemeid(), inputPid.getQualifier().getSchemename());
211
    }
212
}
(1-1/3)