Project

General

Profile

1
package eu.dnetlib.data.claimsDemo;
2

    
3
//import eu.dnetlib.data.claims.migration.Claim;
4
import eu.dnetlib.data.claims.migration.*;
5
import org.w3c.dom.Document;
6
import org.w3c.dom.Element;
7
import org.w3c.dom.Node;
8
import org.w3c.dom.NodeList;
9
import org.xml.sax.InputSource;
10
import org.xml.sax.SAXException;
11

    
12
import javax.xml.parsers.DocumentBuilder;
13
import javax.xml.parsers.DocumentBuilderFactory;
14
import javax.xml.parsers.ParserConfigurationException;
15
import javax.xml.transform.Transformer;
16
import javax.xml.transform.TransformerFactory;
17
import javax.xml.transform.dom.DOMSource;
18
import javax.xml.transform.stream.StreamResult;
19
import javax.xml.xpath.XPath;
20
import javax.xml.xpath.XPathConstants;
21
import javax.xml.xpath.XPathExpression;
22
import javax.xml.xpath.XPathFactory;
23
import java.io.*;
24
import java.net.HttpURLConnection;
25
import java.net.URL;
26
import java.sql.Timestamp;
27
import java.util.Date;
28

    
29
/**
30
 * Created by argirok on 20/11/2015.
31
 */
32
/*
33
Parsing xml from claims DB
34
* DMF xml
35
* Relation XML
36
 */
37
public class ParsingClaimUtils {
38
    private DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
39
    private DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
40
    private InputSource inputSource;
41
    private Document document;
42
    private XPathFactory xPathfactory;
43
    private XPath xpath;
44
    private String relationType;
45
    private  SearchUtils searchUtils= new SearchUtils();
46

    
47
   /*ParsingClaimUtils(String xml) throws ParserConfigurationException, IOException, SAXException {
48
        inputSource = new InputSource(new StringReader(xml));
49
        document = dBuilder.parse(inputSource);
50
        relationType=getAttributeFromRel2ActionsXML("type");
51
    }*/
52
    ParsingClaimUtils(String uri, boolean search) throws ParserConfigurationException, IOException, SAXException{
53
        document = dBuilder.parse(uri);
54
        xPathfactory = XPathFactory.newInstance();
55
        xpath = xPathfactory.newXPath();
56
    }
57

    
58
    public String getValueFromXMLAtrribute(String elementName) {
59
        return getValueFromElement("field", elementName);
60
    }
61
    private String getValueFromElement(String element, String elementName) {
62
        XPathExpression expr = null;
63
        try {
64
            expr = xpath.compile(String.format("//%s[@name=\"%s\"]", element, elementName));
65
        NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
66
        if (nl.getLength() > 0) {
67
            Node nNode = nl.item(0);
68
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
69
                Element eElement = (Element) nNode;
70
                return eElement.getAttribute("value");
71
            }
72
        }
73
            return null;
74
        } catch (Exception e) {
75
            e.printStackTrace();
76
            return null;
77

    
78
        }
79
    }
80
    private String getResultsSize() {
81
        XPathExpression expr = null;
82
        try {
83
            expr = xpath.compile("//total/text()");
84
            NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
85
            if (nl.getLength() > 0) {
86
                   return nl.item(0).getNodeValue();
87
            }
88
            return null;
89
        } catch (Exception e) {
90
            e.printStackTrace();
91
            return null;
92

    
93
        }
94
    }
95

    
96
/*
97
Get the type of the target of an annotation /
98
Get the type of the source of a claim /
99

    
100
 */
101
   static public String getTargetType(String relationType) {
102
        String sourceType = ClaimUtils.PUBLICATION;
103
        if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
104
            sourceType = ClaimUtils.PUBLICATION;
105
        } else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
106
            sourceType = ClaimUtils.PUBLICATION;
107
        } else if (relationType.equals("resultProject")) {
108
            sourceType = ClaimUtils.PUBLICATION;
109
            //could be a dataset too
110
        } else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
111
            sourceType = ClaimUtils.DATASET;
112
        } else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
113
            sourceType = ClaimUtils.DATASET;
114
        }
115
        return sourceType;
116
    }
117
    /*
118
Get the type of the body of an annotation /
119
Get the type of the target of a claim /
120

    
121
 */
122
    public static String getBodyType(String relationType) {
123
        String sourceType = ClaimUtils.PUBLICATION;
124
        if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
125
            sourceType = ClaimUtils.PUBLICATION;
126
        } else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
127
            sourceType = ClaimUtils.DATASET;
128
        } else if (relationType.equals("resultProject")) {
129
            sourceType = ClaimUtils.PROJECT;
130
            //could be a dataset too
131
        } else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
132
            sourceType = ClaimUtils.PUBLICATION;
133
        } else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
134
            sourceType = ClaimUtils.DATASET;
135
        }
136
        return sourceType;
137
    }
138

    
139
    static public Result getResultFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException {
140
       Result r=new Result();
141
        r.setXml(xml);
142
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
143
          DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
144
          InputSource inputSource= new InputSource(new StringReader(xml));
145
          Document document=dBuilder.parse(inputSource);
146
          XPathFactory xPathfactory= XPathFactory.newInstance();
147
          XPath xpath = xPathfactory.newXPath();
148
        try {
149
             NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
150
            if (nl.getLength() > 0) {
151
                r.setDoi(nl.item(0).getNodeValue());
152
            }
153
            nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
154
            if (nl.getLength() > 0) {
155
                r.setOrcidworkid(nl.item(0).getNodeValue());
156
            }
157

    
158
            // DON'T DELETE following lines for orcidworkid!!
159
            // @identiferType typo is made on purpose -> there are dmf xml with this typo.
160
            if(r.getXml().contains("@identiferType='orcidworkid'")) {
161
                r.setXml(r.getXml().replace("@identiferType='orcidworkid'","@identifierType='orcidworkid'"));
162
                nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
163
                if (nl.getLength() > 0) {
164
                    r.setOrcidworkid(nl.item(0).getNodeValue());
165
                }
166
            }
167
             nl = (NodeList) xpath.compile("//*[local-name()='objIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
168
            if (nl.getLength() > 0) {
169
                r.setOpenaireId(nl.item(0).getNodeValue());
170
            }
171
            nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
172
             if (nl.getLength() > 0) {
173
                r.setExternal_url(nl.item(0).getNodeValue());
174
            }
175
            nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
176
            if (nl.getLength() > 0) {
177
                r.setAccessRights(nl.item(0).getNodeValue());
178
            }
179
            // <oaf:accessrights>EMBARGO</oaf:accessrights>
180
            //<oaf:embargoenddate>2015-03-01</oaf:embargoenddate>
181
            nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
182
            if (nl.getLength() > 0) {
183
                r.setEmbargoEndDate(nl.item(0).getNodeValue());
184
            }
185
            nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
186
            if (nl.getLength() > 0) {
187
                r.setTitle(nl.item(0).getNodeValue());
188
            }
189
            nl = (NodeList) xpath.compile("//*[local-name()='collectedFrom']/@id").evaluate(document, XPathConstants.NODESET);
190
            if (nl.getLength() > 0) {
191
                r.setCollectedFrom(nl.item(0).getNodeValue());
192
            }
193
        } catch (Exception e) {
194
            e.printStackTrace();
195
            return null;
196

    
197
        }
198
        return r;
199
    }
200
    static public void getClaimFromConceptDMF(Claim claim, String xml)  {
201
        Result r=new Result();
202
        Context context =new Context();
203

    
204
        try {
205
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
206
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
207
            InputSource inputSource= new InputSource(new StringReader(xml));
208
            Document document=dBuilder.parse(inputSource);
209
            XPathFactory xPathfactory= XPathFactory.newInstance();
210
            XPath xpath = xPathfactory.newXPath();
211
            NodeList nl = (NodeList) xpath.compile("//*[local-name()='recordIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
212
            if (nl.getLength() > 0) {
213
                r.setOpenaireId(nl.item(0).getNodeValue());
214
            }
215
            nl = (NodeList) xpath.compile("//*[local-name()='concept']/@id").evaluate(document, XPathConstants.NODESET);
216
            if (nl.getLength() > 0) {
217
                context.setOpenaireId(nl.item(0).getNodeValue());
218
            }
219
         } catch (Exception e) {
220
            e.printStackTrace();
221

    
222
        }
223

    
224
        claim.setTarget(buildResult(r.getOpenaireId(), null,claim.getId(),ClaimUtils.USEAPIRESULTS));
225
        buildContext(context);
226
        claim.setSource(context);
227
     }
228
    static public void getRelationClaim(Claim claim, String xml) throws IOException, SAXException, ParserConfigurationException {
229
        String relationType="";
230
        String sourceId=""; //Annotation source
231
        String targetId=""; //Annotation target
232
//        //<RELATION type='resultProject' source='50|od______1266::af81022e9c489007a8f9ab27c2c725cb' target='40|fct_________::0432268334291febec6d0dbc1f8bae5d' />
233

    
234
        try {
235
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
236
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
237
            InputSource inputSource= new InputSource(new StringReader(xml));
238
            Document document=dBuilder.parse(inputSource);
239
            XPathFactory xPathfactory= XPathFactory.newInstance();
240
            XPath xpath = xPathfactory.newXPath();
241
            NodeList nl = (NodeList) xpath.compile("//RELATION/@type").evaluate(document, XPathConstants.NODESET);
242
            if (nl.getLength() > 0) {
243
                relationType=(nl.item(0).getNodeValue());
244
            }
245
            nl = (NodeList) xpath.compile("//RELATION/@source").evaluate(document, XPathConstants.NODESET);
246
            if (nl.getLength() > 0) {
247
               targetId=(nl.item(0).getNodeValue());
248
            }
249
            nl = (NodeList) xpath.compile("//RELATION/@target").evaluate(document, XPathConstants.NODESET);
250
            if (nl.getLength() > 0) {
251
                sourceId=(nl.item(0).getNodeValue());
252
            }
253
        } catch (Exception e) {
254
            e.printStackTrace();
255

    
256
        }
257
        claim.setTarget(buildResult(targetId, getTargetType(relationType),claim.getId(),ClaimUtils.USEAPIRESULTS));
258
        String bodyType=getBodyType(relationType);
259
        if (bodyType.equals(ClaimUtils.PROJECT)) {
260
            claim.setSource(buildProject(sourceId, claim.getId(),ClaimUtils.USEAPIPROJECTS));
261
        } else {
262
            claim.setSource(buildResult(sourceId, bodyType,claim.getId(),ClaimUtils.USEAPIRESULTS));
263
            bodyType=((Result)claim.getSource()).getResultType();
264
        }
265
        claim.setTargetType(((Result) claim.getTarget()).getResultType());
266
        claim.setSourceType(bodyType);
267
     }
268
    static public void buildContext(Context context){
269
        if(context!=null&&context.getOpenaireId()!=null){
270
            try {
271
                 context.setTitle(ContextUtils.extractEgiLabel(context.getOpenaireId()));
272
            }catch (Exception e){
273
                e.printStackTrace();
274
                System.err.println("ContextUtils: Couldn't get Egi label for id "+context.getId());
275
            }
276
        }
277
    }
278
    static Result buildResult(String id, String type,String claimId,boolean useApi)  {
279
        Result body= new Result();
280
        if (id.contains("|")) {
281
            id = id.split("\\|")[1];
282
        }
283
        body.setOpenaireId(id);
284
        body.setResultType(type);
285
        if(useApi){
286
            //look for a publication
287
            getResultFromAPI(body, claimId,true);
288
            String openaireId=body.getOpenaireId();
289
            String objId=getObjIdentifierFromSearch(openaireId);
290
            if(!body.isFound()){
291
                if(objId!=null&&!openaireId.equals(objId)){
292
                    // not found! look for a publication with objId
293
                    //if result not found in API with openaireId, search for it with objIdentifier
294
                    body.setOpenaireId(objId);
295
                    getResultFromAPI(body, claimId,true);
296
                    body.setOpenaireId(openaireId);
297
                    //TODO check which one of the ids we should keep!!!!
298
                }
299
                if(!body.isFound()){
300
                    //if still not found search for Dataset with the id
301
                        getResultFromAPI(body, claimId, false);
302
                        if (!body.isFound() && (objId = getObjIdentifierFromSearch(openaireId)) != null && !openaireId.equals(objId)) {
303
                            // still not found!! search for Dataset with the onjId
304
                            //if result not found in API with openaireId, search for it with objIdentifier
305
                            body.setOpenaireId(objId);
306
                            getResultFromAPI(body, claimId, false);
307
                            body.setOpenaireId(openaireId);
308
                            //TODO check which one of the ids we should keep!!!!
309
                        }
310
                }
311
            }
312
            //TODO check if it is a dataset
313
            if(!body.isFound()) {
314
                try {
315
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
316
                    Date date = new java.util.Date();
317
                    out.println(new Timestamp(date.getTime()) + " - Result Not Found: " + body.getOpenaireId() + " (API) in claim " + claimId);
318
                    out.close();
319
                } catch (IOException e) {
320
                    e.printStackTrace();
321
                    System.err.println("Couldn't write to file " + "results_not_found.txt");
322
                }
323
            }
324
        }else {
325
            getResultFromSearch(body, claimId);
326
        }
327
        return body;
328
    }
329

    
330
    private static void getResultFromSearch(Result  r,String claimId){
331
        SearchUtils searchUtils= new SearchUtils();
332
        String searchUri=searchUtils.getResultSearchUrl(r.getOpenaireId());
333
        if(searchUri==null){
334
            return ;
335
        }
336
        System.out.println("Result query: "+searchUri);
337
        try {
338
            String size=null;
339
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
340
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
341
            Document document=dBuilder.parse(searchUri);
342
            XPathFactory xPathfactory= XPathFactory.newInstance();
343
            XPath xpath = xPathfactory.newXPath();
344
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
345
            if (nl.getLength() > 0) {
346
                size= nl.item(0).getNodeValue();
347
            }
348
            if(size!=null && Integer.parseInt(size)>0){
349
                nl = (NodeList) xpath.compile("//field[@name='resulttypename']/@value").evaluate(document, XPathConstants.NODESET);
350
                if (nl.getLength() > 0) {
351
                    r.setResultType(nl.item(0).getNodeValue());
352
                }
353
                nl = (NodeList) xpath.compile("//field[@name='bestlicense']/@value").evaluate(document, XPathConstants.NODESET);
354
                if (nl.getLength() > 0) {
355
                    r.setBestLicense(nl.item(0).getNodeValue());
356
                }
357
                nl = (NodeList) xpath.compile("//field[@name='collectedfrom']/@value").evaluate(document, XPathConstants.NODESET);
358
                if (nl.getLength() > 0) {
359
                    r.setCollectedFrom(nl.item(0).getNodeValue());
360
                }
361
                nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
362
                if (nl.getLength() > 0) {
363
                    r.setTitle(nl.item(0).getNodeValue());
364
                }
365
                nl = (NodeList) xpath.compile("//field[@name='pid']").evaluate(document, XPathConstants.NODESET);
366
                for(int i=0; i<nl.getLength();i++){
367
                    String id="";
368
                    String type="";
369
                    int count;
370

    
371

    
372
                    NodeList valueNodes = (NodeList) xpath.compile("//field[@name='value']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
373
                        if (valueNodes.getLength() > 0) {
374
                            id = valueNodes.item(i).getNodeValue();
375
                        }
376
                    NodeList classNodes = (NodeList) xpath.compile("//field[@name='classid']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
377
                        if (classNodes.getLength() > 0) {
378
                            type = classNodes.item(i).getNodeValue();
379
                        }
380
                         if (type.equals("doi") && id != null) {
381
                            r.setDoi(id);
382
                            r.setExternal_url("http://dx.doi.org/"+r.getDoi());
383

    
384
                        }else if(type.equals("pmc") && id != null) {
385
                             r.setPmcid(id);
386
                         }
387

    
388
                }
389

    
390

    
391
            }else{
392
                try{
393
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
394
                    Date date= new java.util.Date();
395
                    out.println(new Timestamp(date.getTime())+" - Result Not Found: "+r.getOpenaireId()+ " in claim "+claimId);
396
                    out.close();
397
                }catch (IOException e) {
398
                    e.printStackTrace();
399
                    System.err.println("Couldn't write to file " + "results_not_found.txt");
400
                }
401
                //System.err.println("Result Not Found " + r.getOpenaireId());
402
                r.setFound(false);
403
            }
404

    
405
        } catch (Exception e) {
406
            e.printStackTrace();
407
            return ;
408
        }
409
    }
410
    public static String getObjIdentifierFromSearch(String id){
411
        SearchUtils searchUtils= new SearchUtils();
412
        String searchUri=searchUtils.getResultSearchUrl(id);
413
        String objIdentifier=null;
414
        if(searchUri==null){
415
            return null;
416
        }
417
        System.out.println("Result query: "+searchUri);
418
        try {
419
            String size=null;
420
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
421
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
422
            Document document=dBuilder.parse(searchUri);
423
            XPathFactory xPathfactory= XPathFactory.newInstance();
424
            XPath xpath = xPathfactory.newXPath();
425
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
426
            if (nl.getLength() > 0) {
427
                size= nl.item(0).getNodeValue();
428
            }
429
            if(size!=null && Integer.parseInt(size)>0){
430
                //indexId="objIdentifier"
431
                //<field name="resultId" multiplicity="false" indexId="objIdentifier" label="Object id" value="dedup_wf_001::7832a296929028bbe447d66398a0c43a"/>
432
                nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
433
                if (nl.getLength() > 0) {
434
                    objIdentifier=nl.item(0).getNodeValue();
435
                }
436

    
437

    
438
            }
439
        } catch (Exception e) {
440
            e.printStackTrace();
441
            return null;
442
        }
443
        return objIdentifier;
444
    }
445
    private static void getResultFromAPI(Result  r,String claimId, boolean isPublication){
446
        SearchUtils searchUtils= new SearchUtils();
447
        String searchUri;
448
        if(isPublication) {
449
            searchUri = searchUtils.getPublicationApiUrl(r.getOpenaireId());
450
        }else{
451
            searchUri = searchUtils.getDatasetApiUrl(r.getOpenaireId());
452
        }
453
        if(searchUri==null){
454
            return;
455
        }
456
        System.out.println("Result query: "+searchUri);
457
        try {
458
            URL obj =obj = new URL(searchUri);
459
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
460
            int responseCode = con.getResponseCode();
461
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
462
            StringBuffer response = new StringBuffer();
463
            String inputLine;
464
            while ((inputLine = in.readLine()) != null) {
465
                response.append(inputLine+"\n");
466
            }
467
            in.close();
468
             String xml = response.toString();
469
            String size=null;
470
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
471
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
472
            //Document document=dBuilder.parse(searchUri);
473
            InputSource is = new InputSource(new StringReader(xml));
474
            Document document= document=dBuilder.parse(is);
475
            XPathFactory xPathfactory= XPathFactory.newInstance();
476
            XPath xpath = xPathfactory.newXPath();
477

    
478

    
479
            NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
480
            if (nl.getLength() > 0) {
481
                size= nl.item(0).getNodeValue();
482
            }
483
            if(size!=null && Integer.parseInt(size)>0){
484
                nl = (NodeList) xpath.compile("//resulttype/@classid").evaluate(document, XPathConstants.NODESET);
485
                if (nl.getLength() > 0) {
486
                    r.setResultType(nl.item(0).getNodeValue());
487
                }
488
                nl = (NodeList) xpath.compile("//bestlicense/@classid").evaluate(document, XPathConstants.NODESET);
489
                if (nl.getLength() > 0) {
490
                    r.setBestLicense(nl.item(0).getNodeValue());
491
                }
492
                //TODO check this
493
               /* nl = (NodeList) xpath.compile("//collectedfrom/@id").evaluate(document, XPathConstants.NODESET);
494
                r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
495
                for(int i=0;i<nl.getLength();i++) {
496
                    String datasourceId=nl.item(i).getNodeValue();
497
                    if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
498
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
499
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_CROSSREF)) {
500
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
501
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_DATACTE)) {
502
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
503
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
504
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
505
                    }
506
                }*/
507
//                if (nl.getLength() > 0) {
508
//                    r.setCollectedFrom(nl.item(0).getNodeValue());
509
//                }
510
                nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
511
                if (nl.getLength() > 0) {
512
                    r.setTitle(nl.item(0).getNodeValue());
513
                }
514
                nl = (NodeList) xpath.compile("//pid[@classid='doi']/text()").evaluate(document, XPathConstants.NODESET);
515
                if (nl.getLength() > 0) {
516
                    r.setDoi(nl.item(0).getNodeValue());
517
                    r.setExternal_url("http://dx.doi.org/"+r.getDoi());
518
                }
519
                nl = (NodeList) xpath.compile("//pid[@classid='pmc']/text()").evaluate(document, XPathConstants.NODESET);
520
                if (nl.getLength() > 0) {
521
                    r.setPmcid(nl.item(0).getNodeValue());
522

    
523
                }
524
                r.setXml(xml);
525
                r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
526
                r.setFound(true);
527

    
528
            }else{
529
                r.setFound(false);
530
            }
531

    
532
        } catch (Exception e) {
533
            e.printStackTrace();
534
        }
535
    }
536
    public static void getXmlfromDatacite(Result  r){
537
        r.setResultType(ClaimUtils.DATASET);
538
        SearchUtils searchUtils= new SearchUtils();
539
        if(r!=null&&r.getDoi()==null){
540
            return ;
541
        }
542
        String xml =searchUtils.getDataciteXmlRecord(r.getDoi());
543
         if(xml==null){
544
            return ;
545
        }
546
         try {
547
            String size=null;
548
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
549
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
550
            InputSource is = new InputSource(new StringReader(xml));
551
            Document document= document=dBuilder.parse(is);
552
            XPathFactory xPathfactory= XPathFactory.newInstance();
553
            XPath xpath = xPathfactory.newXPath();
554
            NodeList nl  ;
555

    
556
                nl = (NodeList) xpath.compile("//j.0:title/text()").evaluate(document, XPathConstants.NODESET);
557
                if (nl.getLength() > 0) {
558
                    r.setTitle(nl.item(0).getNodeValue());
559
                }
560
                nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
561
                if (nl.getLength() > 0) {
562
                    r.setDoi(nl.item(0).getNodeValue());
563
                    r.setExternal_url("http://dx.doi.org/"+r.getDoi());
564
                }
565
                r.setXml(xml);
566
             r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
567
                r.setFound(true);
568
        } catch (Exception e) {
569
            e.printStackTrace();
570
        }
571
    }
572
    public static void getJsonfromCrossref(Result  r){
573
        SearchUtils searchUtils= new SearchUtils();
574
        if(r!=null&&r.getDoi()==null){
575
            return ;
576
        }
577
        String xml =searchUtils.getCrossrefJsonRecord(r.getDoi());
578
        if(xml!=null){
579
            r.setXml(xml);
580
            r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
581
        }
582
    }
583
    public static void getXmlfromOrcid(Result  r){
584
        r.setResultType(ClaimUtils.PUBLICATION);
585
        SearchUtils searchUtils= new SearchUtils();
586
        if(r!=null&&r.getOrcidworkid()==null){
587
            return ;
588
        }
589
        System.out.println("OWI: " + r.getOrcidworkid());
590
        String orcid=r.getOrcidworkid().substring(0,19);
591
        String orcidworkid=r.getOrcidworkid().substring(20,r.getOrcidworkid().length());
592
                //0000-0003-5000-0001
593
        System.out.println("orcid: "+orcid);
594
        System.out.println("OWI: "+orcidworkid);
595
        String xml =searchUtils.getOrcidXmlRecord(orcid);
596
        if(xml==null){
597
            return ;
598
        }
599
        try {
600
            String size=null;
601
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
602
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
603
            InputSource is = new InputSource(new StringReader(xml));
604
            Document document= document=dBuilder.parse(is);
605
            XPathFactory xPathfactory= XPathFactory.newInstance();
606
            XPath xpath = xPathfactory.newXPath();
607
            NodeList nl  ;
608
//            getParent().toXML()
609
//            <orcid-work put-code="19500531" visibility="public">
610
            String s = (String) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document, XPathConstants.STRING);
611
                 System.out.println("Here: "+s);
612

    
613

    
614
            NodeList worknl = (NodeList) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document,XPathConstants.NODESET);
615
            if (worknl.getLength() > 0) {
616

    
617
                nl = (NodeList) xpath.compile("//orcid-work").evaluate(document, XPathConstants.NODESET);
618
                for (int i = 0; i < nl.getLength(); i++) {
619
                    if(!worknl.item(0).isEqualNode(nl.item(i))) {
620
                        nl.item(i).getParentNode().removeChild(nl.item(i));
621
                    }
622
                }
623

    
624

    
625
                DOMSource domSource = new DOMSource(document);
626
                StringWriter writer = new StringWriter();
627
                StreamResult result = new StreamResult(writer);
628
                TransformerFactory tf = TransformerFactory.newInstance();
629
                Transformer transformer = tf.newTransformer();
630
                transformer.transform(domSource, result);
631
                System.out.println("XML IN String format is: \n" + writer.toString());
632
                r.setXml(xml);
633
                r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
634
                r.setFound(true);
635
            }
636
            nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
637
            if (nl.getLength() > 0) {
638
                r.setDoi(nl.item(0).getNodeValue());
639
                r.setExternal_url("http://dx.doi.org/"+r.getDoi());
640
            }
641
        } catch (Exception e) {
642
            e.printStackTrace();
643
        }
644
    }
645
        public static Project buildProject(String id, String claimId, boolean useAPI){
646
        Project body=new Project();
647
         if (id.contains("|")) {
648
            id = id.split("\\|")[1];
649
        }
650
        body.setOpenaireId(id);
651
        if(useAPI){
652
            getProjectFromAPI(body,claimId);
653
            return body;
654
        }else{
655
            getProjectFromSearch(body, claimId);
656
            return body;
657
        }
658

    
659

    
660
    }
661

    
662
    private static void getProjectFromSearch(Project project, String claimId){
663
        SearchUtils s=new SearchUtils();
664
        String searchUri=s.getProjectSearchUrl(project.getOpenaireId());
665
        if(searchUri==null){
666
            return ;
667
        }
668
       System.out.println("Project query: "+searchUri);
669
        try {
670
            String size=null;
671
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
672
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
673
            Document document=dBuilder.parse(searchUri);
674
            XPathFactory xPathfactory= XPathFactory.newInstance();
675
            XPath xpath = xPathfactory.newXPath();
676
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
677
            if (nl.getLength() > 0) {
678
                size= nl.item(0).getNodeValue();
679
            }
680
            if(size!=null && Integer.parseInt(size)>0){
681
                /*
682
                <field name="name" indexId="projectacronym" multiplicity="true" label=colle value="OPENAIRE"/><field name="code" indexId="projectcode" multiplicity="true" label="Project code" value="246686"/><field name="title" indexId="projecttitle" multiplicity="true" label="Title" value="Open Access Infrastructure for Research in Europe"/>
683
                 */
684
                nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
685
                if (nl.getLength() > 0) {
686
                    project.setName(nl.item(0).getNodeValue());
687
                }
688
                nl = (NodeList) xpath.compile("//field[@name='name']/@value").evaluate(document, XPathConstants.NODESET);
689
                if (nl.getLength() > 0) {
690
                    project.setAcronym(nl.item(0).getNodeValue());
691
                }
692
                /*
693
                <test/><field name="funder" multiplicity="true"><field name="funderid" indexId="funderid" label="" value="ec__________::EC"/><field name="fundershortname" indexId="fundershortname" label="" value="EC"/><field name="fundername" indexId="fundername" label="" value="European Commission"/></field>
694
                */
695

    
696
                nl = (NodeList) xpath.compile("//field[@name='fundername']/@value").evaluate(document, XPathConstants.NODESET);
697
                if (nl.getLength() > 0) {
698
                    project.setFunderName(nl.item(0).getNodeValue());
699
                }
700
                nl = (NodeList) xpath.compile("//field[@name='funderid']/@value").evaluate(document, XPathConstants.NODESET);
701
                if (nl.getLength() > 0) {
702
                    project.setFunderId(nl.item(0).getNodeValue());
703
                }
704

    
705
            }else{
706
                try{
707
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
708
                    Date date= new java.util.Date();
709
                    out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
710
                    out.close();
711
                }catch (IOException e) {
712
                    e.printStackTrace();
713
                    System.err.println("Couldn't write to file " + "projects_not_found.txt");
714
                }
715
                //System.err.println("PROJECT Not Found " + project.getOpenaireId());
716
                project.setFound(false);
717

    
718
            }
719

    
720
        } catch (Exception e) {
721
        }
722
    }
723
    private static void getProjectFromAPI(Project project, String claimId){
724

    
725

    
726
        SearchUtils s=new SearchUtils();
727
        String searchUri=s.getProjectApiUrl(project.getOpenaireId());
728
        searchUri="http://api.openaire.eu/search//projects?format=xml&openaireParticipantID=dedup_wf_001::82c87f641bb6219626a0ceca81e0d434";
729
        if(searchUri==null){
730
            return ;
731
        }
732
         //TODO the parsing from API
733
        System.out.println("Project query: "+searchUri);
734
         try {
735
            URL obj =obj = new URL(searchUri);
736
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
737
            int responseCode = con.getResponseCode();
738
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
739
            StringBuffer response = new StringBuffer();
740
            String inputLine;
741
            while ((inputLine = in.readLine()) != null) {
742
                response.append(inputLine);
743
            }
744
            in.close();
745
            String xml = response.toString();
746
            String size=null;
747
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
748
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
749
            //Document document=dBuilder.parse(searchUri);
750
            InputSource is = new InputSource(new StringReader(xml));
751
            Document document= document=dBuilder.parse(is);
752
            XPathFactory xPathfactory= XPathFactory.newInstance();
753
            XPath xpath = xPathfactory.newXPath();
754

    
755

    
756
            NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
757
            if (nl.getLength() > 0) {
758
                size= nl.item(0).getNodeValue();
759
            }
760
            if(size!=null && Integer.parseInt(size)>0){
761

    
762
                nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
763
                if (nl.getLength() > 0) {
764
                    project.setName(nl.item(0).getNodeValue());
765
                }
766
                nl = (NodeList) xpath.compile("//acronym/text()").evaluate(document, XPathConstants.NODESET);
767
                if (nl.getLength() > 0) {
768
                    project.setAcronym(nl.item(0).getNodeValue());
769
                }
770
                nl = (NodeList) xpath.compile("//funder/name/text()").evaluate(document, XPathConstants.NODESET);
771
                if (nl.getLength() > 0) {
772
                    project.setFunderName(nl.item(0).getNodeValue());
773
                }
774
                nl = (NodeList) xpath.compile("//funder/id/text()").evaluate(document, XPathConstants.NODESET);
775
                if (nl.getLength() > 0) {
776
                    project.setFunderId(nl.item(0).getNodeValue());
777
                }
778

    
779
                project.setFound(true);
780

    
781
            }else{
782
                try{
783
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
784
                    Date date= new java.util.Date();
785
                    out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
786
                    out.close();
787
                }catch (IOException e) {
788
                    e.printStackTrace();
789
                    System.err.println("Couldn't write to file " + "projects_not_found.txt");
790
                }
791
                //System.err.println("PROJECT Not Found " + project.getOpenaireId());
792
                project.setFound(false);
793
            }
794

    
795
        } catch (Exception e) {
796
            e.printStackTrace();
797
        }
798
    }
799
}
(4-4/9)