Project

General

Profile

1
package eu.dnetlib.data.claims.migration.parser;
2

    
3
import eu.dnetlib.data.claims.migration.entity.Context;
4
import eu.dnetlib.data.claims.migration.entity.Result;
5
import eu.dnetlib.data.claims.migration.handler.DirectIndexHandler;
6
import eu.dnetlib.data.claimsDemo.ClaimUtils;
7
import eu.dnetlib.data.claimsDemo.ContextUtils;
8
import org.apache.log4j.Logger;
9
import org.w3c.dom.Document;
10
import org.w3c.dom.NodeList;
11
import org.xml.sax.InputSource;
12
import org.xml.sax.SAXException;
13

    
14
import javax.xml.parsers.DocumentBuilder;
15
import javax.xml.parsers.DocumentBuilderFactory;
16
import javax.xml.parsers.ParserConfigurationException;
17
import javax.xml.xpath.XPath;
18
import javax.xml.xpath.XPathConstants;
19
import javax.xml.xpath.XPathExpressionException;
20
import javax.xml.xpath.XPathFactory;
21
import java.io.IOException;
22
import java.io.StringReader;
23

    
24
/**
25
 * Created by kiatrop on 5/2/2016.
26
 */
27
public class DMFParser {
28
    private static final Logger logger = Logger.getLogger(DMFParser.class);
29

    
30
    public static Result dmf2Result(String xml) throws ParserConfigurationException, IOException, SAXException {
31
        Result r = new Result();
32
        r.setMetadataRecord(xml);
33
        r.setRecordFormat(ClaimUtils.FORMAT_XML);
34
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
35
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
36
        InputSource inputSource = new InputSource(new StringReader(xml));
37
        Document document = dBuilder.parse(inputSource);
38
        XPathFactory xPathfactory = XPathFactory.newInstance();
39
        XPath xpath = xPathfactory.newXPath();
40
        try {
41
            NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
42
            if (nl.getLength() > 0) {
43
                r.setDoi(nl.item(0).getNodeValue());
44
                r.setExternalUrl("http://dx.doi.org/" + r.getDoi());
45
            }
46
            nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
47
            if (nl.getLength() > 0) {
48
                r.setOrcidworkid(nl.item(0).getNodeValue());
49
            }
50

    
51
            // DON'T DELETE following lines for orcidworkid!!
52
            // @identiferType typo is made on purpose -> there are dmf xml with this typo.
53
            if (r.getMetadataRecord().contains("@identiferType='orcidworkid'")) {
54
                r.setMetadataRecord(r.getMetadataRecord().replace("@identiferType='orcidworkid'", "@identifierType='orcidworkid'"));
55
                nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
56
                if (nl.getLength() > 0) {
57
                    r.setOrcidworkid(nl.item(0).getNodeValue());
58
                }
59
            }
60
            nl = (NodeList) xpath.compile("//*[local-name()='objIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
61
            if (nl.getLength() > 0) {
62
                r.setOpenaireId(nl.item(0).getNodeValue());
63
            }
64
            if(r.getExternalUrl() == null) {
65
                nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
66
                if (nl.getLength() > 0) {
67
                    r.setExternalUrl(nl.item(0).getNodeValue());
68
                }
69
            }
70
            nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
71
            if (nl.getLength() > 0) {
72
                r.setAccessRights(nl.item(0).getNodeValue());
73
            }
74
            nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
75
            if (nl.getLength() > 0) {
76
                r.setEmbargoEndDate(nl.item(0).getNodeValue());
77
            }
78
            nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
79
            if (nl.getLength() > 0) {
80
                r.setTitle(nl.item(0).getNodeValue());
81
            }
82
            nl = (NodeList) xpath.compile("//*[local-name()='collectedFrom']/@id").evaluate(document, XPathConstants.NODESET);
83
            if (nl.getLength() > 0) {
84
                r.setCollectedFrom(nl.item(0).getNodeValue());
85
            }
86

    
87
        } catch (Exception e) {
88
            logger.error("Error Parsing dmf result:\n "+xml,e);
89
            return null;
90

    
91
        }
92
        return r;
93
    }
94

    
95

    
96
    /**
97
     *
98
     * @param xml of a context DMF
99
     * @return context id
100
     * @throws ParserConfigurationException
101
     * @throws IOException
102
     * @throws SAXException
103
     * @throws XPathExpressionException
104
     */
105
    public  static String getContextIdFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException {
106
        String id=null;
107
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
108
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
109
        InputSource inputSource = new InputSource(new StringReader(xml));
110
        Document document = dBuilder.parse(inputSource);
111
        XPathFactory xPathfactory = XPathFactory.newInstance();
112
        XPath xpath = xPathfactory.newXPath();
113

    
114
        NodeList nl = (NodeList) xpath.compile("//*[local-name()='concept']/@id").evaluate(document, XPathConstants.NODESET);
115
        if (nl.getLength() > 0) {
116
            id = (nl.item(0).getNodeValue());
117
        }
118
        System.out.println(id);
119
        return id;
120
    }
121
    /**
122
     *
123
     * @param xml
124
     * @return DOI
125
     * @throws ParserConfigurationException
126
     * @throws IOException
127
     * @throws SAXException
128
     * @throws XPathExpressionException
129
     */
130
    public static String getDOIIdentifierFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException {
131
        String id=null;
132
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
133
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
134
        InputSource inputSource = new InputSource(new StringReader(xml));
135
        Document document = dBuilder.parse(inputSource);
136
        XPathFactory xPathfactory = XPathFactory.newInstance();
137
        XPath xpath = xPathfactory.newXPath();
138

    
139
                NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
140
                if (nl.getLength() > 0) {
141
                    id = (nl.item(0).getNodeValue());
142
                }
143

    
144
        return id;
145
    }
146

    
147
    public static String getOrcidworkIdentifierFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException {
148
        String id=null;
149
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
150
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
151
        InputSource inputSource = new InputSource(new StringReader(xml));
152
        Document document = dBuilder.parse(inputSource);
153
        XPathFactory xPathfactory = XPathFactory.newInstance();
154
        XPath xpath = xPathfactory.newXPath();
155

    
156
        NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
157
                if (nl.getLength() > 0) {
158
                    id = (nl.item(0).getNodeValue());
159
                }
160

    
161
                // DON'T DELETE following lines for orcidworkid!!
162
                // @identiferType typo is made on purpose -> there are dmf xml with this typo.
163
                if (id== null) {
164
                     nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
165
                    if (nl.getLength() > 0) {
166
                        id = nl.item(0).getNodeValue();
167
                    }
168
                }
169

    
170
        return id;
171
    }
172
    public static String getAccessRightsFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException {
173
        String accessRights=null;
174
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
175
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
176
        InputSource inputSource = new InputSource(new StringReader(xml));
177
        Document document = dBuilder.parse(inputSource);
178
        XPathFactory xPathfactory = XPathFactory.newInstance();
179
        XPath xpath = xPathfactory.newXPath();
180

    
181
        NodeList nl;
182
        nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
183
        if (nl.getLength() > 0) {
184
            accessRights=nl.item(0).getNodeValue();
185
        }
186
        return accessRights;
187
    }
188

    
189
    public static String getEmbargoEndDateFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException {
190
        String embargoDate=null;
191
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
192
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
193
        InputSource inputSource = new InputSource(new StringReader(xml));
194
        Document document = dBuilder.parse(inputSource);
195
        XPathFactory xPathfactory = XPathFactory.newInstance();
196
        XPath xpath = xPathfactory.newXPath();
197

    
198
        NodeList nl;
199
        nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
200
        if (nl.getLength() > 0) {
201
            embargoDate=nl.item(0).getNodeValue();
202
        }
203
        return embargoDate;
204
    }
205

    
206
}
(1-1/4)