Project

General

Profile

1
package eu.dnetlib.data.claims.migration.parser;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.data.claims.migration.entity.Result;
5
import eu.dnetlib.data.claimsDemo.ClaimUtils;
6
import org.apache.log4j.Logger;
7
import org.w3c.dom.Document;
8
import org.w3c.dom.NodeList;
9
import org.xml.sax.InputSource;
10
import org.xml.sax.SAXException;
11
import sun.print.CUPSPrinter;
12

    
13
import javax.xml.parsers.DocumentBuilder;
14
import javax.xml.parsers.DocumentBuilderFactory;
15
import javax.xml.parsers.ParserConfigurationException;
16
import javax.xml.transform.Transformer;
17
import javax.xml.transform.TransformerFactory;
18
import javax.xml.transform.dom.DOMSource;
19
import javax.xml.transform.stream.StreamResult;
20
import javax.xml.xpath.XPath;
21
import javax.xml.xpath.XPathConstants;
22
import javax.xml.xpath.XPathExpressionException;
23
import javax.xml.xpath.XPathFactory;
24
import java.io.BufferedReader;
25
import java.io.IOException;
26
import java.io.StringReader;
27
import java.io.StringWriter;
28
import java.math.BigInteger;
29
import java.security.MessageDigest;
30
import java.security.NoSuchAlgorithmException;
31
import java.util.List;
32

    
33
/**
34
 * Created by kiatrop on 5/2/2016.
35
 */
36

    
37
public class ExternalRecordParser {
38

    
39
    /**
40
     * Gets the json response from crossref API
41
     * and returns a Result object
42
     * *
43
     * @param json
44
     * @return Result or null
45
     */
46
    private static final Logger logger = Logger.getLogger(ExternalRecordParser.class);
47

    
48
    public static Result crossref2Result(String json) {
49
        Result result = null;
50
        if (json == null){
51
            return result;
52
        }
53

    
54
        BufferedReader br = new BufferedReader(new StringReader(json));
55
        //convert the json string back to object
56
        Gson gson = new Gson();
57
        CrossrefResponse obj = gson.fromJson(br, CrossrefResponse.class);
58

    
59
        if(obj!=null && obj.getMessage().getItems().size()>0){
60
            result= new Result();
61
            result.setMetadataRecord(json);
62
            result.setRecordFormat(ClaimUtils.FORMAT_JSON);
63
//            result.setFound(true);
64
            result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
65
            CrossrefResponse.Message.Item item = obj.getMessage().getItems().get(0);
66
            if (item.getTitle()!=null && item.getTitle().size()>0){
67
                result.setTitle(item.getTitle().get(0));
68
                if(item.getAuthor() != null) {
69
                    for (int i = 0; i < item.getAuthor().size(); i++) {
70
                        result.getAuthors().put(i + "", item.getAuthor().get(i).getGiven() + " " + item.getAuthor().get(i).getFamily());
71
                    }
72
                }
73
            }
74
//            System.out.println(item.getDOI());
75
            result.setDoi(item.getDOI());
76
            result.setOpenaireId(createOpenaireId(item.getDOI()));
77
            result.setExternalUrl(item.getUrl());
78
            if(result.getDoi()!=null&& result.getExternalUrl()==null){
79
                result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + result.getDoi());
80
            }
81
            result.setResultType(ClaimUtils.PUBLICATION);
82

    
83
        }
84

    
85
        return result;
86
    }
87

    
88
    public static Result dataciteJson2Result(String json) {
89
        Result result = null;
90
        if (json == null){
91
            return result;
92
        }
93

    
94
        BufferedReader br = new BufferedReader(new StringReader(json));
95
        //convert the json string back to object
96
        Gson gson = new Gson();
97

    
98
        DataciteResponse obj = gson.fromJson(br, DataciteResponse.class);
99
        System.out.println(json);
100
        if(obj!=null && obj.getData() != null ){
101
            result= new Result();
102
            result.setMetadataRecord(json);
103
            result.setRecordFormat(ClaimUtils.FORMAT_JSON);
104
//            result.setFound(true);
105
            result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
106
            DataciteResponse.Data.Attributes item = obj.getData().getAttributes();
107
            if (item.getTitle()!=null){
108
                result.setTitle(item.getTitle());
109
                if(item.getAuthor() != null) {
110
                    for (int i = 0; i < item.getAuthor().size(); i++) {
111
                        result.getAuthors().put(i + "", (item.getAuthor().get(i).getGiven() != null)?(item.getAuthor().get(i).getGiven() + " " + item.getAuthor().get(i).getFamily()):item.getAuthor().get(i).getLiteral());
112
                    }
113
                }
114
            }
115
            System.out.println(item.getDoi());
116
            result.setDoi(item.getDoi());
117
            result.setOpenaireId(createOpenaireId(item.getDoi()));
118
            if(result.getDoi()!=null&& result.getExternalUrl()==null){
119
                result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + result.getDoi());
120
            }
121
            result.setResultType(ClaimUtils.DATASET);
122

    
123
        }
124

    
125
        return result;
126
    }
127

    
128
    /**
129
     *
130
     * @param xml
131
     * @param orcidworkid The id from DMF identifier[@identifierType='orcidworkid'] {orcid + work-id}
132
     * @return Result or null
133
     */
134
    public static Result orcid2Result(String xml, String orcidworkid) {
135
         Result result = null;
136
        if (xml == null || orcidworkid == null){
137
            return result;
138
        }
139
        String orcidwork = orcidworkid.substring(20, orcidworkid.length());
140

    
141
        try {
142
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
143
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
144
            InputSource is = new InputSource(new StringReader(xml));
145
            Document document = dBuilder.parse(is);
146
            XPathFactory xPathfactory= XPathFactory.newInstance();
147
            XPath xpath = xPathfactory.newXPath();
148

    
149
            NodeList nl;
150
            System.out.println(orcidwork);
151
            NodeList worknl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']").evaluate(document,XPathConstants.NODESET);
152
            for( int n = 0; n<worknl.getLength(); n++){
153
                String code = (String) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']/@put-code").evaluate(worknl.item(n), XPathConstants.STRING);
154
                result = new Result();
155
                result.setResultType(ClaimUtils.PUBLICATION);
156
                nl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']/*[name()='work:title']/*[name()='common:title']/text()").evaluate(worknl.item(n), XPathConstants.NODESET);
157
                if (nl.getLength() > 0) {
158
                    result.setTitle(nl.item(0).getNodeValue());
159
                }
160

    
161
                nl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']//*[name()='common:external-id']").evaluate(worknl.item(n), XPathConstants.NODESET);
162
                for (int i = 0; i < nl.getLength(); i++) {
163
                    NodeList identifiersNl;
164
                    String type=null;
165
                    String id=null;
166
                    identifiersNl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']//*[name()='common:external-id-type']/text()").evaluate(nl.item(i), XPathConstants.NODESET);
167
                    if (identifiersNl.getLength() > 0) {
168
                        type=identifiersNl.item(0).getNodeValue();
169
                    }
170
                    identifiersNl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']//*[name()='common:external-id-value']/text()").evaluate(nl.item(i), XPathConstants.NODESET);
171
                    if (identifiersNl.getLength() > 0) {
172
                        id=identifiersNl.item(0).getNodeValue();
173
                    }
174
                    if(type != null && type.equals("doi") && id != null){
175
                        result.setDoi(id);
176
                        result.setExternalUrl(ClaimUtils.PREFIX_URL_FOR_DOI + id);
177
                    }
178
                    //more types (isbn)
179

    
180
                }
181
                if(result.getExternalUrl()==null) {
182
                    nl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']//*[name()='common:source']/*[name()='common:source-client-id']/*[name()='common:uri']/text()").evaluate(document, XPathConstants.NODESET);
183
                    if (nl.getLength() > 0) {
184
                        result.setExternalUrl(nl.item(0).getNodeValue());
185
                    }
186
                }
187

    
188
                nl = (NodeList) xpath.compile("//*[name()='work:work-summary'][@put-code='"+orcidwork+"']//*[name()='work:work-summary']").evaluate(document, XPathConstants.NODESET);
189
                for (int i = 0; i < nl.getLength(); i++) {
190
                    if(!worknl.item(0).isEqualNode(nl.item(i))) {
191
                        nl.item(i).getParentNode().removeChild(nl.item(i));
192
                    }
193
                }
194

    
195
                DOMSource domSource = new DOMSource(document);
196
                StringWriter writer = new StringWriter();
197
                StreamResult streamResult = new StreamResult(writer);
198
                TransformerFactory tf = TransformerFactory.newInstance();
199
                Transformer transformer = tf.newTransformer();
200
                transformer.transform(domSource, streamResult);
201
                result.setOrcidworkid(orcidworkid);
202
                result.setOpenaireId(createOpenaireId(orcidworkid));
203
                result.setMetadataRecord(writer.toString());
204
                result.setRecordFormat(ClaimUtils.FORMAT_XML);
205
                result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
206
//                result.setFound(true);
207
            }
208

    
209
        } catch (Exception e) {
210
            logger.error("Error parsing Orcid result\n"+xml,e);
211
        }
212

    
213
        return result;
214
    }
215

    
216
    /**
217
     *
218
     * @param xml
219
     * @return Result object or null
220
     */
221
    public static Result datacite2Result(String xml) {
222
        logger.debug("Datacite xml response:\n"+xml);
223
        Result result = null;
224
        if (xml != null ) {
225

    
226
            try {
227
                DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
228
                DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
229
                InputSource is = new InputSource(new StringReader(xml));
230
                Document document = dBuilder.parse(is);
231
                XPathFactory xPathfactory = XPathFactory.newInstance();
232
                XPath xpath = xPathfactory.newXPath();
233
                NodeList nl;
234
                nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
235
                if (nl.getLength() > 0) {
236
                    result = new Result();
237
                    result.setResultType(ClaimUtils.DATASET);
238

    
239
                    result.setDoi(nl.item(0).getNodeValue());
240
                    result.setExternalUrl("http://dx.doi.org/" + result.getDoi());
241

    
242
                    result.setOpenaireId(createOpenaireId(result.getDoi()));
243
                    nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
244
                    if (nl.getLength() > 0) {
245
                        result.setTitle(nl.item(0).getNodeValue());
246
                    }
247

    
248
                    nl = (NodeList) xpath.compile("//*[local-name()='creator']/text()").evaluate(document, XPathConstants.NODESET);
249
                    if (nl.getLength() > 0) {
250
                        for (int i = 0; i < nl.getLength(); i++) {
251
                            result.getAuthors().put(i + "", nl.item(i).getNodeValue());
252
                        }
253
                    }
254

    
255
                    result.setMetadataRecord(xml);
256
                    result.setRecordFormat(ClaimUtils.FORMAT_XML);
257
                    result.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
258
//                result.setFound(true);
259
                    logger.debug("Datacite result" + result.toString());
260

    
261
                } else {
262

    
263
                  logger.error("Couldn't parse Datacite result\n" + xml);
264

    
265
                }
266

    
267
                } catch(ParserConfigurationException | IOException | XPathExpressionException |SAXException e){
268
                    logger.error("Error parsing Orcid result\n"+xml,e);
269
                }
270

    
271
        }
272
            return result;
273
    }
274
    public static String createOpenaireId(String id){
275
//        System.out.println("createOpenaireId from id:" +id);
276
        if(id==null){
277
            return null;
278
        }
279
        String openaireId=id;
280
        MessageDigest m= null;
281
        try {
282
            m = MessageDigest.getInstance("MD5");
283
            m.update(id.getBytes(),0,id.length());
284
            openaireId = new BigInteger(1,m.digest()).toString(16);
285
            while(openaireId.length() < 32 ){
286
                openaireId = "0"+openaireId;
287
            }
288
        } catch (NoSuchAlgorithmException e) {
289
            logger.error("Couldn't instatiate md5 algorithm",e);
290
        }
291
        openaireId ="userclaim___::"+openaireId;
292
        return openaireId;
293
    }
294

    
295
}
296

    
297
/**
298
 * The following classes display the crossref structure of API response
299
 * They are used for parsing the json CrossrefResponse
300
 */
301

    
302
class CrossrefResponse {
303
    String status;
304
    Message message;
305

    
306
    public String getStatus() {
307
        return status;
308
    }
309

    
310
    public Message getMessage() {
311
        return message;
312
    }
313
    class Message{
314
        List<Item> items;
315

    
316
        public List<Item> getItems() {
317
            return items;
318
        }
319

    
320
        class Item {
321
            private String DOI;
322
            private List<String> title;
323
            private List<Author> author;
324
            private String Url;
325
            private String type;
326
            private String source;
327

    
328
            public String getDOI() {
329
                return DOI;
330
            }
331

    
332
            public List<String> getTitle() {
333
                return title;
334
            }
335

    
336
            public List<Author> getAuthor() { return author;}
337

    
338
            public String getUrl() {
339
                return Url;
340
            }
341

    
342
            public String getType() {
343
                return type;
344
            }
345

    
346
            public String getSource() {
347
                return source;
348
            }
349

    
350
            class Author {
351
                String given;
352
                String family;
353

    
354
                public String getGiven() {
355
                    return given;
356
                }
357

    
358
                public String getFamily() {
359
                    return family;
360
                }
361

    
362
            }
363
        }
364
    }
365
}
366

    
367
class DataciteResponse {
368

    
369
    Data data;
370

    
371
    public Data getData() {
372
        return data;
373
    }
374

    
375
    class Data{
376

    
377

    
378
            private Attributes attributes;
379

    
380
            public Attributes getAttributes() {
381
                return attributes;
382
            }
383

    
384
            class Attributes{
385
                private String doi;
386
                private String containerTitle;
387
                private String published;
388
                private String title;
389
                private List<Author> author;
390

    
391
                public String getDoi() {
392
                    return doi;
393
                }
394

    
395
                public String getContainerTitle() {
396
                    return containerTitle;
397
                }
398

    
399
                public String getPublished() {
400
                    return published;
401
                }
402

    
403
                public String getTitle() {
404
                    return title;
405
                }
406

    
407
                public List<Author> getAuthor() {
408
                    return author;
409
                }
410
            }
411
            class Author {
412
                String given;
413
                String family;
414
                String literal;
415

    
416
                public String getGiven() {
417
                    return given;
418
                }
419

    
420
                public String getFamily() {
421
                    return family;
422
                }
423

    
424
                public String getLiteral() {
425
                    return literal;
426
                }
427
            }
428
        }
429

    
430
}
(2-2/4)