Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import com.google.common.collect.Lists;
4
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
5
import eu.dnetlib.data.proto.*;
6
import eu.dnetlib.data.proto.DliProtos;
7
import org.apache.commons.lang3.StringUtils;
8
import org.w3c.dom.Node;
9
import org.w3c.dom.NodeList;
10

    
11
import java.util.List;
12

    
13

    
14
public class DmfToHbaseXsltFunctions extends DliToHbaseXsltFunctions {
15

    
16
    public static String dliDataset(
17
            final String resultId,
18
            final NodeList about,
19
            final NodeList titles,
20
            final NodeList subjects,
21
            final NodeList publisher,
22
            final NodeList descriptions,
23
            final NodeList dates,
24
            final NodeList dateaccepted,
25
            final NodeList resourceTypes,
26
            final NodeList formats,
27
            final NodeList sizes,
28
            final NodeList languages,
29
            final NodeList cobjcategory,
30
            final NodeList creators,
31
            final NodeList rights,
32
            final NodeList pidList,
33
            final String provenance,
34
            final NodeList aboutNode,
35
            final NodeList originalIds,
36
            final String dateOfCollection,
37
            final String dateOfTransformation,
38
            final String trust) {
39

    
40
//        final String entityId = DNGFRowKeyDecoder.decode(resultId).getKey();
41
        final DatasetProtos.Dataset.Builder dataset = DatasetProtos.Dataset.newBuilder();
42
        DatasetProtos.Dataset.Metadata.Builder metadataProto = DatasetProtos.Dataset.Metadata.newBuilder();
43

    
44
        // subject
45
        for (int i = 0; i < subjects.getLength(); i++) {
46
            Node currentNode = subjects.item(i);
47
            NodeList childNodes = currentNode.getChildNodes();
48
            if (childNodes.getLength() > 0) {
49
                String subjectValue = childNodes.item(0).getNodeValue();
50
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("subject"),
51
                        getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:result_subject", "dnet:result_subject"));
52
            }
53
        }
54
        // title
55
        manageTitle(titles, metadataProto);
56

    
57
        // description
58
        for (int i = 0; i < descriptions.getLength(); i++) {
59
            Node currentNode = descriptions.item(i);
60
            if (currentNode != null && currentNode.hasChildNodes()) {
61
                String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
62
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
63
            }
64
        }
65

    
66
        // contributors
67
        for (int i = 0; i < creators.getLength(); i++) {
68
            Node currentNode = creators.item(i);
69
            if (currentNode != null && currentNode.hasChildNodes()) {
70
                String contributorValue = currentNode.getChildNodes().item(0).getNodeValue();
71

    
72

    
73
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
74
            }
75
        }
76

    
77

    
78
        // publisher
79
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
80
        // dates
81
        manageDate(dates, metadataProto);
82

    
83
        // dateofacceptance
84
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
85

    
86
        // size
87
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
88

    
89
        // format
90
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
91

    
92

    
93
        // language
94
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("language"),
95
                setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
96

    
97

    
98
        dataset.setMetadata(metadataProto);
99

    
100
        DNGFProtos.DNGFEntity.Builder entity = DliToHbaseXsltFunctions.parseAbout(aboutNode, null);
101

    
102
        entity.setType(TypeProtos.Type.dataset).setId(resultId);
103

    
104
        // resultType
105
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(getResourceType(resourceTypes), "dnet:result_typologies"));
106

    
107
        entity.setDataset(dataset);
108

    
109
        List<FieldTypeProtos.StructuredProperty> pids = parsePids(pidList);
110

    
111
        //entity.addAllPid(Iterables.filter(pids, Predicates.notNull()));
112

    
113
        pids.stream().map(DliToHbaseXsltFunctions::fixPid).forEach(pid -> entity.addExtension(DliProtos.typedIdentifier, pid));
114

    
115
        PmfToHbaseXsltFunctions.addResolvedFrom(entity, about);
116

    
117
        final DNGFProtos.DNGF oaf = getOaf(entity, getDataInfo(aboutNode, provenance, trust, false, false));
118

    
119
        return base64(oaf.toByteArray());
120

    
121
    }
122

    
123

    
124
    private static String getResourceType(final NodeList resourceTypes) {
125
        if (resourceTypes.getLength() > 0) {
126
            String nodeValue = resourceTypes.item(0).getFirstChild().getNodeValue();
127
            return nodeValue;
128
        }
129
        return null;
130
    }
131

    
132
    public static List<FieldTypeProtos.StructuredProperty> parsePids(final NodeList nodelist) {
133

    
134
        final List<FieldTypeProtos.StructuredProperty> pids = Lists.newArrayList();
135

    
136
        for (int i = 0; i < nodelist.getLength(); i++) {
137
            final Node node = nodelist.item(i);
138
            Node pidType = null;
139
            if (node.getNodeType() == Node.ELEMENT_NODE) {
140
                if (node.getLocalName().equalsIgnoreCase("identifier")) {
141
                    pidType = node.getAttributes().getNamedItem("identifierType");
142
                    if (node.getTextContent() != null && StringUtils.isNotEmpty(node.getTextContent().trim()))
143
                        pids.add(getStructuredProperty(node.getTextContent(), pidType.getTextContent(), pidType.getTextContent(), "dnet:pid_types",
144
                                "dnet:pid_types"));
145
                }
146
            }
147
        }
148
        return pids;
149
    }
150

    
151

    
152
}
(2-2/3)