Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import com.google.common.collect.Lists;
4
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
5
import eu.dnetlib.data.proto.*;
6
import org.w3c.dom.Node;
7
import org.w3c.dom.NodeList;
8

    
9
import java.util.List;
10

    
11

    
12
public class DmfToHbaseXsltFunctions extends DliToHbaseXsltFunctions {
13

    
14
    public static String dliDataset(
15
            final String resultId,
16
            final NodeList about,
17
            final NodeList titles,
18
            final NodeList subjects,
19
            final NodeList publisher,
20
            final NodeList descriptions,
21
            final NodeList dates,
22
            final NodeList dateaccepted,
23
            final NodeList resourceTypes,
24
            final NodeList formats,
25
            final NodeList sizes,
26
            final NodeList languages,
27
            final NodeList cobjcategory,
28
            final NodeList creators,
29
            final NodeList rights,
30
            final NodeList pidList,
31
            final String provenance,
32
            final NodeList aboutNode,
33
            final NodeList originalIds,
34
            final String dateOfCollection,
35
            final String dateOfTransformation,
36
            final String trust) {
37

    
38
        final String entityId = DNGFRowKeyDecoder.decode(resultId).getKey();
39
        final DatasetProtos.Dataset.Builder dataset = DatasetProtos.Dataset.newBuilder();
40
        DatasetProtos.Dataset.Metadata.Builder metadataProto = DatasetProtos.Dataset.Metadata.newBuilder();
41

    
42
        // subject
43
        for (int i = 0; i < subjects.getLength(); i++) {
44
            Node currentNode = subjects.item(i);
45
            NodeList childNodes = currentNode.getChildNodes();
46
            if (childNodes.getLength() > 0) {
47
                String subjectValue = childNodes.item(0).getNodeValue();
48
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("subject"),
49
                        getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:result_subject", "dnet:result_subject"));
50
            }
51
        }
52
        // title
53
        manageTitle(titles, metadataProto);
54

    
55
        // description
56
        for (int i = 0; i < descriptions.getLength(); i++) {
57
            Node currentNode = descriptions.item(i);
58
            if (currentNode != null && currentNode.hasChildNodes()) {
59
                String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue();
60
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("description"), descriptionValue);
61
            }
62
        }
63

    
64
        // contributors
65
        for (int i = 0; i < creators.getLength(); i++) {
66
            Node currentNode = creators.item(i);
67
            if (currentNode != null && currentNode.hasChildNodes()) {
68
                String contributorValue = currentNode.getChildNodes().item(0).getNodeValue();
69

    
70

    
71
                addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue);
72
            }
73
        }
74

    
75

    
76
        // publisher
77
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher));
78
        // dates
79
        manageDate(dates, metadataProto);
80

    
81
        // dateofacceptance
82
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted));
83

    
84
        // size
85
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes));
86

    
87
        // format
88
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats));
89

    
90

    
91
        // language
92
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("language"),
93
                setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages))));
94

    
95

    
96
        dataset.setMetadata(metadataProto);
97

    
98
        DNGFProtos.DNGFEntity.Builder entity = DliToHbaseXsltFunctions.parseAbout(aboutNode, null);
99

    
100
        entity.setType(TypeProtos.Type.dataset).setId(resultId);
101

    
102
        // resultType
103
        addField(metadataProto, DatasetProtos.Dataset.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier(getResourceType(resourceTypes), "dnet:result_typologies"));
104

    
105
        entity.setDataset(dataset);
106

    
107
        List<FieldTypeProtos.StructuredProperty> pids = parsePids(pidList);
108

    
109
        //entity.addAllPid(Iterables.filter(pids, Predicates.notNull()));
110

    
111
        pids.stream().map(DliToHbaseXsltFunctions::fixPid).forEach(pid -> entity.addExtension(DliProtos.typedIdentifier, pid));
112

    
113
        PmfToHbaseXsltFunctions.addResolvedFrom(entity, about);
114

    
115
        final DNGFProtos.DNGF oaf = getOaf(entity, getDataInfo(aboutNode, provenance, trust, false, false));
116

    
117
        return base64(oaf.toByteArray());
118

    
119
    }
120

    
121

    
122
    private static String getResourceType(final NodeList resourceTypes) {
123
        if (resourceTypes.getLength() > 0) {
124
            String nodeValue = resourceTypes.item(0).getFirstChild().getNodeValue();
125
            return nodeValue;
126
        }
127
        return null;
128
    }
129

    
130
    public static List<FieldTypeProtos.StructuredProperty> parsePids(final NodeList nodelist) {
131

    
132
        final List<FieldTypeProtos.StructuredProperty> pids = Lists.newArrayList();
133

    
134
        for (int i = 0; i < nodelist.getLength(); i++) {
135
            final Node node = nodelist.item(i);
136
            Node pidType = null;
137
            if (node.getNodeType() == Node.ELEMENT_NODE) {
138
                if (node.getLocalName().equalsIgnoreCase("identifier")) {
139
                    pidType = node.getAttributes().getNamedItem("identifierType");
140

    
141
                    pids.add(getStructuredProperty(node.getTextContent(), pidType.getTextContent(), pidType.getTextContent(), "dnet:pid_types",
142
                            "dnet:pid_types"));
143
                }
144
            }
145
        }
146
        return pids;
147
    }
148

    
149

    
150
}
(2-2/3)