Project

General

Profile

1
package eu.dnetlib.data.claims.migration.parser;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.data.claims.migration.entity.Result;
5
import eu.dnetlib.data.claimsDemo.ClaimUtils;
6
import org.apache.log4j.Logger;
7
import org.w3c.dom.Document;
8
import org.w3c.dom.NodeList;
9
import org.xml.sax.InputSource;
10
import org.xml.sax.SAXException;
11
import sun.print.CUPSPrinter;
12

    
13
import javax.xml.parsers.DocumentBuilder;
14
import javax.xml.parsers.DocumentBuilderFactory;
15
import javax.xml.parsers.ParserConfigurationException;
16
import javax.xml.transform.Transformer;
17
import javax.xml.transform.TransformerFactory;
18
import javax.xml.transform.dom.DOMSource;
19
import javax.xml.transform.stream.StreamResult;
20
import javax.xml.xpath.XPath;
21
import javax.xml.xpath.XPathConstants;
22
import javax.xml.xpath.XPathExpressionException;
23
import javax.xml.xpath.XPathFactory;
24
import java.io.BufferedReader;
25
import java.io.IOException;
26
import java.io.StringReader;
27
import java.io.StringWriter;
28
import java.math.BigInteger;
29
import java.security.MessageDigest;
30
import java.security.NoSuchAlgorithmException;
31
import java.util.List;
32

    
33
/**
34
 * Created by kiatrop on 5/2/2016.
35
 */
36

    
37
public class ExternalRecordParser {
38

    
39
    /**
40
     * Gets the json response from crossref API
41
     * and returns a Result object
42
     * *
43
     * @param json
44
     * @return Result or null
45
     */
46
    private static final Logger logger = Logger.getLogger(ExternalRecordParser.class);
47

    
48
    public static Result crossref2Result(String json) {
49
        Result result = null;
50
        if (json == null){
51
            return result;
52
        }
53

    
54
        BufferedReader br = new BufferedReader(new StringReader(json));
55
        //convert the json string back to object
56
        Gson gson = new Gson();
57
        CrossrefResponse obj = gson.fromJson(br, CrossrefResponse.class);
58

    
59
        if(obj!=null && obj.getMessage().getItems().size()>0){
60
            result= new Result();
61
            result.setMetadataRecord(json);
62
            result.setRecordFormat(ClaimUtils.FORMAT_JSON);
63
//            result.setFound(true);
64
            result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
65
            CrossrefResponse.Message.Item item = obj.getMessage().getItems().get(0);
66
            if (item.getTitle()!=null && item.getTitle().size()>0){
67
                result.setTitle(item.getTitle().get(0));
68
                if(item.getAuthor() != null) {
69
                    for (int i = 0; i < item.getAuthor().size(); i++) {
70
                        result.getAuthors().put(i + "", item.getAuthor().get(i).getGiven() + " " + item.getAuthor().get(i).getFamily());
71
                    }
72
                }
73
            }
74
            System.out.println(item.getDOI());
75
            result.setDoi(item.getDOI());
76
            result.setOpenaireId(createOpenaireId(item.getDOI()));
77
            result.setExternalUrl(item.getUrl());
78
            if(result.getDoi()!=null&& result.getExternalUrl()==null){
79
                result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + result.getDoi());
80
            }
81
            result.setResultType(ClaimUtils.PUBLICATION);
82

    
83
        }
84

    
85
        return result;
86
    }
87

    
88
    public static Result dataciteJson2Result(String json) {
89
        Result result = null;
90
        if (json == null){
91
            return result;
92
        }
93

    
94
        BufferedReader br = new BufferedReader(new StringReader(json));
95
        //convert the json string back to object
96
        Gson gson = new Gson();
97

    
98
        DataciteResponse obj = gson.fromJson(br, DataciteResponse.class);
99
        System.out.println(json);
100
        if(obj!=null && obj.getData() != null ){
101
            result= new Result();
102
            result.setMetadataRecord(json);
103
            result.setRecordFormat(ClaimUtils.FORMAT_JSON);
104
//            result.setFound(true);
105
            result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
106
            DataciteResponse.Data.Attributes item = obj.getData().getAttributes();
107
            if (item.getTitle()!=null){
108
                result.setTitle(item.getTitle());
109
                if(item.getAuthor() != null) {
110
                    for (int i = 0; i < item.getAuthor().size(); i++) {
111
                        result.getAuthors().put(i + "", (item.getAuthor().get(i).getGiven() != null)?(item.getAuthor().get(i).getGiven() + " " + item.getAuthor().get(i).getFamily()):item.getAuthor().get(i).getLiteral());
112
                    }
113
                }
114
            }
115
            System.out.println(item.getDoi());
116
            result.setDoi(item.getDoi());
117
            result.setOpenaireId(createOpenaireId(item.getDoi()));
118
            if(result.getDoi()!=null&& result.getExternalUrl()==null){
119
                result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + result.getDoi());
120
            }
121
            result.setResultType(ClaimUtils.DATASET);
122

    
123
        }
124

    
125
        return result;
126
    }
127

    
128
    /**
129
     *
130
     * @param xml
131
     * @param orcidworkid The id from DMF identifier[@identifierType='orcidworkid'] {orcid + work-id}
132
     * @return Result or null
133
     */
134
    public static Result orcid2Result(String xml, String orcidworkid) {
135
         Result result = null;
136
        if (xml == null || orcidworkid == null){
137
            return result;
138
        }
139
        String orcidwork = orcidworkid.substring(20, orcidworkid.length());
140

    
141
        try {
142
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
143
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
144
            InputSource is = new InputSource(new StringReader(xml));
145
            Document document = dBuilder.parse(is);
146
            XPathFactory xPathfactory= XPathFactory.newInstance();
147
            XPath xpath = xPathfactory.newXPath();
148

    
149
            NodeList nl;
150
            //worknl is the list with the works we search for
151
            NodeList worknl = (NodeList) xpath.compile("//orcid-work[@put-code="+orcidwork+"]").evaluate(document,XPathConstants.NODESET);
152
            if (worknl.getLength() > 0) {
153
                //worknl contains at least one result
154
                // instantiate result
155
                result = new Result();
156
                result.setResultType(ClaimUtils.PUBLICATION);
157
                nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
158
                if (nl.getLength() > 0) {
159
                    result.setTitle(nl.item(0).getNodeValue());
160
                }
161
                nl = (NodeList) xpath.compile("//work-external-identifier").evaluate(document, XPathConstants.NODESET);
162
                for (int i = 0; i < nl.getLength(); i++) {
163
                    NodeList identifiersNl;
164
                    String type=null;
165
                    String id=null;
166
                    identifiersNl = (NodeList) xpath.compile("//work-external-identifier-type/text()").evaluate(nl.item(i), XPathConstants.NODESET);
167
                    if (identifiersNl.getLength() > 0) {
168
                        type=identifiersNl.item(0).getNodeValue();
169
                    }
170
                    identifiersNl = (NodeList) xpath.compile("//work-external-identifier-id/text()").evaluate(nl.item(i), XPathConstants.NODESET);
171
                    if (identifiersNl.getLength() > 0) {
172
                        id=identifiersNl.item(0).getNodeValue();
173
                    }
174
                    if(type != null && type.equals("doi") && id != null){
175
                        result.setDoi(id);
176
                        result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + id);
177
                    }
178
                    //more types (isbn)
179

    
180
                }
181
                if(result.getExternalUrl()==null) {
182
                    nl = (NodeList) xpath.compile("//work-source/uri/text()").evaluate(document, XPathConstants.NODESET);
183
                    if (nl.getLength() > 0) {
184
                        result.setExternalUrl(nl.item(0).getNodeValue());
185
                    }
186
                }
187

    
188
                nl = (NodeList) xpath.compile("//orcid-work").evaluate(document, XPathConstants.NODESET);
189
                for (int i = 0; i < nl.getLength(); i++) {
190
                    if(!worknl.item(0).isEqualNode(nl.item(i))) {
191
                        nl.item(i).getParentNode().removeChild(nl.item(i));
192
                    }
193
                }
194

    
195
                DOMSource domSource = new DOMSource(document);
196
                StringWriter writer = new StringWriter();
197
                StreamResult streamResult = new StreamResult(writer);
198
                TransformerFactory tf = TransformerFactory.newInstance();
199
                Transformer transformer = tf.newTransformer();
200
                transformer.transform(domSource, streamResult);
201
                result.setOrcidworkid(orcidworkid);
202
                result.setOpenaireId(createOpenaireId(orcidworkid));
203
                result.setMetadataRecord(writer.toString());
204
                result.setRecordFormat(ClaimUtils.FORMAT_XML);
205
                result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
206
//                result.setFound(true);
207
            }
208

    
209
        } catch (Exception e) {
210
            logger.error("Error parsing Orcid result\n"+xml,e);
211
        }
212

    
213
        return result;
214
    }
215

    
216
    /**
217
     *
218
     * @param xml
219
     * @return Result object or null
220
     */
221
    public static Result datacite2Result(String xml) {
222
        logger.debug("Datacite xml response:\n"+xml);
223
        Result result = null;
224
        if (xml != null ) {
225

    
226
            try {
227
                DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
228
                DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
229
                InputSource is = new InputSource(new StringReader(xml));
230
                Document document = dBuilder.parse(is);
231
                XPathFactory xPathfactory = XPathFactory.newInstance();
232
                XPath xpath = xPathfactory.newXPath();
233
                NodeList nl;
234
                nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
235
                if (nl.getLength() > 0) {
236
                    result = new Result();
237
                    result.setResultType(ClaimUtils.DATASET);
238

    
239
                    result.setDoi(nl.item(0).getNodeValue());
240
                    result.setExternalUrl("http://dx.doi.org/" + result.getDoi());
241

    
242
                    result.setOpenaireId(createOpenaireId(result.getDoi()));
243
                    nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
244
                    if (nl.getLength() > 0) {
245
                        result.setTitle(nl.item(0).getNodeValue());
246
                    }
247

    
248
                    nl = (NodeList) xpath.compile("//*[local-name()='creator']/text()").evaluate(document, XPathConstants.NODESET);
249
                    if (nl.getLength() > 0) {
250
                        for (int i = 0; i < nl.getLength(); i++) {
251
                            result.getAuthors().put(i + "", nl.item(i).getNodeValue());
252
                        }
253
                    }
254

    
255
                    result.setMetadataRecord(xml);
256
                    result.setRecordFormat(ClaimUtils.FORMAT_XML);
257
                    result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
258
//                result.setFound(true);
259
                    logger.debug("Datacite result" + result.toString());
260

    
261
                } else {
262

    
263
                  logger.error("Couldn't parse Datacite result\n" + xml);
264

    
265
                }
266

    
267
                } catch(ParserConfigurationException | IOException | XPathExpressionException |SAXException e){
268
                    logger.error("Error parsing Orcid result\n"+xml,e);
269
                }
270

    
271
        }
272
            return result;
273
    }
274
    private static String createOpenaireId(String id){
275
        System.out.println("createOpenaireId from id:" +id);
276
        if(id==null){
277
            return null;
278
        }
279
        String openaireId=id;
280
        MessageDigest m= null;
281
        try {
282
            m = MessageDigest.getInstance("MD5");
283
            m.update(id.getBytes(),0,id.length());
284
            openaireId = new BigInteger(1,m.digest()).toString(16);
285
        } catch (NoSuchAlgorithmException e) {
286
            logger.error("Couldn't instatiate md5 algorithm",e);
287
        }
288
        openaireId ="user:claim__"+openaireId;
289
        return openaireId;
290
    }
291

    
292
}
293

    
294
/**
295
 * The following classes display the crossref structure of API response
296
 * They are used for parsing the json CrossrefResponse
297
 */
298

    
299
class CrossrefResponse {
300
    String status;
301
    Message message;
302

    
303
    public String getStatus() {
304
        return status;
305
    }
306

    
307
    public Message getMessage() {
308
        return message;
309
    }
310
    class Message{
311
        List<Item> items;
312

    
313
        public List<Item> getItems() {
314
            return items;
315
        }
316

    
317
        class Item {
318
            private String DOI;
319
            private List<String> title;
320
            private List<Author> author;
321
            private String Url;
322
            private String type;
323
            private String source;
324

    
325
            public String getDOI() {
326
                return DOI;
327
            }
328

    
329
            public List<String> getTitle() {
330
                return title;
331
            }
332

    
333
            public List<Author> getAuthor() { return author;}
334

    
335
            public String getUrl() {
336
                return Url;
337
            }
338

    
339
            public String getType() {
340
                return type;
341
            }
342

    
343
            public String getSource() {
344
                return source;
345
            }
346

    
347
            class Author {
348
                String given;
349
                String family;
350

    
351
                public String getGiven() {
352
                    return given;
353
                }
354

    
355
                public String getFamily() {
356
                    return family;
357
                }
358

    
359
            }
360
        }
361
    }
362
}
363

    
364
class DataciteResponse {
365

    
366
    Data data;
367

    
368
    public Data getData() {
369
        return data;
370
    }
371

    
372
    class Data{
373

    
374

    
375
            private Attributes attributes;
376

    
377
            public Attributes getAttributes() {
378
                return attributes;
379
            }
380

    
381
            class Attributes{
382
                private String doi;
383
                private String containerTitle;
384
                private String published;
385
                private String title;
386
                private List<Author> author;
387

    
388
                public String getDoi() {
389
                    return doi;
390
                }
391

    
392
                public String getContainerTitle() {
393
                    return containerTitle;
394
                }
395

    
396
                public String getPublished() {
397
                    return published;
398
                }
399

    
400
                public String getTitle() {
401
                    return title;
402
                }
403

    
404
                public List<Author> getAuthor() {
405
                    return author;
406
                }
407
            }
408
            class Author {
409
                String given;
410
                String family;
411
                String literal;
412

    
413
                public String getGiven() {
414
                    return given;
415
                }
416

    
417
                public String getFamily() {
418
                    return family;
419
                }
420

    
421
                public String getLiteral() {
422
                    return literal;
423
                }
424
            }
425
        }
426

    
427
}
(2-2/4)