Project

General

Profile

1
package eu.dnetlib.data.claimsDemo;
2

    
3
//import eu.dnetlib.data.claims.migration.Claim;
4
import eu.dnetlib.data.claims.migration.*;
5
import org.w3c.dom.Document;
6
import org.w3c.dom.Element;
7
import org.w3c.dom.Node;
8
import org.w3c.dom.NodeList;
9
import org.xml.sax.InputSource;
10
import org.xml.sax.SAXException;
11

    
12
import javax.xml.parsers.DocumentBuilder;
13
import javax.xml.parsers.DocumentBuilderFactory;
14
import javax.xml.parsers.ParserConfigurationException;
15
import javax.xml.transform.Transformer;
16
import javax.xml.transform.TransformerFactory;
17
import javax.xml.transform.dom.DOMSource;
18
import javax.xml.transform.stream.StreamResult;
19
import javax.xml.xpath.XPath;
20
import javax.xml.xpath.XPathConstants;
21
import javax.xml.xpath.XPathExpression;
22
import javax.xml.xpath.XPathFactory;
23
import java.io.*;
24
import java.net.HttpURLConnection;
25
import java.net.URL;
26
import java.sql.Timestamp;
27
import java.util.Date;
28

    
29
/**
30
 * Created by argirok on 20/11/2015.
31
 */
32
/*
33
Parsing xml from claims DB
34
* DMF xml
35
* Relation XML
36
 */
37
public class ParsingClaimUtils {
38
    private DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
39
    private DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
40
    private InputSource inputSource;
41
    private Document document;
42
    private XPathFactory xPathfactory;
43
    private XPath xpath;
44
    private String relationType;
45
    private  SearchUtils searchUtils= new SearchUtils();
46

    
47
   /*ParsingClaimUtils(String xml) throws ParserConfigurationException, IOException, SAXException {
48
        inputSource = new InputSource(new StringReader(xml));
49
        document = dBuilder.parse(inputSource);
50
        relationType=getAttributeFromRel2ActionsXML("type");
51
    }*/
52
    ParsingClaimUtils(String uri, boolean search) throws ParserConfigurationException, IOException, SAXException{
53
        document = dBuilder.parse(uri);
54
        xPathfactory = XPathFactory.newInstance();
55
        xpath = xPathfactory.newXPath();
56
    }
57

    
58
    public String getValueFromXMLAtrribute(String elementName) {
59
        return getValueFromElement("field", elementName);
60
    }
61
    private String getValueFromElement(String element, String elementName) {
62
        XPathExpression expr = null;
63
        try {
64
            expr = xpath.compile(String.format("//%s[@name=\"%s\"]", element, elementName));
65
        NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
66
        if (nl.getLength() > 0) {
67
            Node nNode = nl.item(0);
68
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {
69
                Element eElement = (Element) nNode;
70
                return eElement.getAttribute("value");
71
            }
72
        }
73
            return null;
74
        } catch (Exception e) {
75
            e.printStackTrace();
76
            return null;
77

    
78
        }
79
    }
80
    private String getResultsSize() {
81
        XPathExpression expr = null;
82
        try {
83
            expr = xpath.compile("//total/text()");
84
            NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
85
            if (nl.getLength() > 0) {
86
                   return nl.item(0).getNodeValue();
87
            }
88
            return null;
89
        } catch (Exception e) {
90
            e.printStackTrace();
91
            return null;
92

    
93
        }
94
    }
95

    
96
/*
97
Get the type of the target of an annotation /
98
Get the type of the source of a claim /
99

    
100
 */
101
   static public String getTargetType(String relationType) {
102
        String sourceType = ClaimUtils.PUBLICATION;
103
        if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
104
            sourceType = ClaimUtils.PUBLICATION;
105
        } else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
106
            sourceType = ClaimUtils.PUBLICATION;
107
        } else if (relationType.equals("resultProject")) {
108
            sourceType = ClaimUtils.PUBLICATION;
109
            //could be a dataset too
110
        } else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
111
            sourceType = ClaimUtils.DATASET;
112
        } else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
113
            sourceType = ClaimUtils.DATASET;
114
        }
115
        return sourceType;
116
    }
117
    /*
118
Get the type of the body of an annotation /
119
Get the type of the target of a claim /
120

    
121
 */
122
    public static String getBodyType(String relationType) {
123
        String sourceType = ClaimUtils.PUBLICATION;
124
        if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
125
            sourceType = ClaimUtils.PUBLICATION;
126
        } else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
127
            sourceType = ClaimUtils.DATASET;
128
        } else if (relationType.equals("resultProject")) {
129
            sourceType = ClaimUtils.PROJECT;
130
            //could be a dataset too
131
        } else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
132
            sourceType = ClaimUtils.PUBLICATION;
133
        } else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
134
            sourceType = ClaimUtils.DATASET;
135
        }
136
        return sourceType;
137
    }
138

    
139
    static public Result getResultFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException {
140
       Result r=new Result();
141
        r.setXml(xml);
142
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
143
          DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
144
          InputSource inputSource= new InputSource(new StringReader(xml));
145
          Document document=dBuilder.parse(inputSource);
146
          XPathFactory xPathfactory= XPathFactory.newInstance();
147
          XPath xpath = xPathfactory.newXPath();
148
        try {
149
             NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
150
            if (nl.getLength() > 0) {
151
                r.setDoi(nl.item(0).getNodeValue());
152
            }
153
            nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
154
            if (nl.getLength() > 0) {
155
                r.setOrcidworkid(nl.item(0).getNodeValue());
156
            }
157

    
158
            // DON'T DELETE following lines for orcidworkid!!
159
            // @identiferType typo is made on purpose -> there are dmf xml with this typo.
160
            if(r.getXml().contains("@identiferType='orcidworkid'")) {
161
                r.setXml(r.getXml().replace("@identiferType='orcidworkid'","@identifierType='orcidworkid'"));
162
                nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
163
                if (nl.getLength() > 0) {
164
                    r.setOrcidworkid(nl.item(0).getNodeValue());
165
                }
166
            }
167
             nl = (NodeList) xpath.compile("//*[local-name()='objIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
168
            if (nl.getLength() > 0) {
169
                r.setOpenaireId(nl.item(0).getNodeValue());
170
            }
171
            nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
172
             if (nl.getLength() > 0) {
173
                r.setExternal_url(nl.item(0).getNodeValue());
174
            }
175
            nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
176
            if (nl.getLength() > 0) {
177
                r.setAccessRights(nl.item(0).getNodeValue());
178
            }
179
            // <oaf:accessrights>EMBARGO</oaf:accessrights>
180
            //<oaf:embargoenddate>2015-03-01</oaf:embargoenddate>
181
            nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
182
            if (nl.getLength() > 0) {
183
                r.setEmbargoEndDate(nl.item(0).getNodeValue());
184
            }
185
            nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
186
            if (nl.getLength() > 0) {
187
                r.setTitle(nl.item(0).getNodeValue());
188
            }
189
            nl = (NodeList) xpath.compile("//*[local-name()='collectedFrom']/@id").evaluate(document, XPathConstants.NODESET);
190
            if (nl.getLength() > 0) {
191
                r.setCollectedFrom(nl.item(0).getNodeValue());
192
            }
193
        } catch (Exception e) {
194
            e.printStackTrace();
195
            return null;
196

    
197
        }
198
        return r;
199
    }
200
    static public Claim getClaimFromConceptDMF(Claim claim, String xml)  {
201
        Result r=new Result();
202
        Context context =new Context();
203

    
204
        try {
205
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
206
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
207
            InputSource inputSource= new InputSource(new StringReader(xml));
208
            Document document=dBuilder.parse(inputSource);
209
            XPathFactory xPathfactory= XPathFactory.newInstance();
210
            XPath xpath = xPathfactory.newXPath();
211
            NodeList nl = (NodeList) xpath.compile("//*[local-name()='recordIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
212
            if (nl.getLength() > 0) {
213
                r.setOpenaireId(nl.item(0).getNodeValue());
214
            }
215
            nl = (NodeList) xpath.compile("//*[local-name()='concept']/@id").evaluate(document, XPathConstants.NODESET);
216
            if (nl.getLength() > 0) {
217
                context.setOpenaireId(nl.item(0).getNodeValue());
218
            }
219
         } catch (Exception e) {
220
            e.printStackTrace();
221
            return null;
222

    
223
        }
224

    
225
        claim.setTarget(buildResult(r.getOpenaireId(), null,claim.getId(),ClaimUtils.USEAPIRESULTS));
226
        claim.setSource(buildContext(context));
227
        return claim;
228
    }
229
    static public Claim getRelationClaim(Claim claim, String xml) throws IOException, SAXException, ParserConfigurationException {
230
        String relationType="";
231
        String sourceId=""; //Annotation source
232
        String targetId=""; //Annotation target
233
//        //<RELATION type='resultProject' source='50|od______1266::af81022e9c489007a8f9ab27c2c725cb' target='40|fct_________::0432268334291febec6d0dbc1f8bae5d' />
234

    
235
        try {
236
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
237
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
238
            InputSource inputSource= new InputSource(new StringReader(xml));
239
            Document document=dBuilder.parse(inputSource);
240
            XPathFactory xPathfactory= XPathFactory.newInstance();
241
            XPath xpath = xPathfactory.newXPath();
242
            NodeList nl = (NodeList) xpath.compile("//RELATION/@type").evaluate(document, XPathConstants.NODESET);
243
            if (nl.getLength() > 0) {
244
                relationType=(nl.item(0).getNodeValue());
245
            }
246
            nl = (NodeList) xpath.compile("//RELATION/@source").evaluate(document, XPathConstants.NODESET);
247
            if (nl.getLength() > 0) {
248
               targetId=(nl.item(0).getNodeValue());
249
            }
250
            nl = (NodeList) xpath.compile("//RELATION/@target").evaluate(document, XPathConstants.NODESET);
251
            if (nl.getLength() > 0) {
252
                sourceId=(nl.item(0).getNodeValue());
253
            }
254
        } catch (Exception e) {
255
            e.printStackTrace();
256
            return null;
257

    
258
        }
259
        claim.setTarget(buildResult(targetId, getTargetType(relationType),claim.getId(),ClaimUtils.USEAPIRESULTS));
260
        String bodyType=getBodyType(relationType);
261
        if (bodyType.equals(ClaimUtils.PROJECT)) {
262
            claim.setSource(buildProject(sourceId, claim.getId(),ClaimUtils.USEAPIPROJECTS));
263
        } else {
264
            claim.setSource(buildResult(sourceId, bodyType,claim.getId(),ClaimUtils.USEAPIRESULTS));
265
            bodyType=((Result)claim.getSource()).getResultType();
266
        }
267
        claim.setTargetType(((Result) claim.getTarget()).getResultType());
268
        claim.setSourceType(bodyType);
269
        return claim;
270
    }
271
static public Context buildContext(Context context){
272
    if(context!=null&&context.getOpenaireId()!=null){
273
        try {
274
             context.setTitle(ContextUtils.extractEgiLabel(context.getOpenaireId()));
275
        }catch (Exception e){
276
            e.printStackTrace();
277
            System.err.println("ContextUtils: Couldn't get Egi label for id "+context.getId());
278
        }
279
    }
280
    return  context;
281
}
282
    static Result buildResult(String id, String type,String claimId,boolean useApi)  {
283
        Result body= new Result();
284
        if (id.contains("|")) {
285
            id = id.split("\\|")[1];
286
        }
287
        body.setOpenaireId(id);
288
        body.setResultType(type);
289
        if(useApi){
290
            //look for a publication
291
            body=getResultFromAPI(body, claimId,true);
292
            String openaireId=body.getOpenaireId();
293
            String objId=getObjIdentifierFromSearch(openaireId);
294
            if(!body.isFound()){
295
                if(objId!=null&&!openaireId.equals(objId)){
296
                    // not found! look for a publication with objId
297
                    //if result not found in API with openaireId, search for it with objIdentifier
298
                    body.setOpenaireId(objId);
299
                    body=getResultFromAPI(body, claimId,true);
300
                    body.setOpenaireId(openaireId);
301
                    //TODO check which one of the ids we should keep!!!!
302
                }
303
                if(!body.isFound()){
304
                    //if still not found search for Dataset with the id
305
                        body = getResultFromAPI(body, claimId, false);
306
                        if (!body.isFound() && (objId = getObjIdentifierFromSearch(openaireId)) != null && !openaireId.equals(objId)) {
307
                            // still not found!! search for Dataset with the onjId
308
                            //if result not found in API with openaireId, search for it with objIdentifier
309
                            body.setOpenaireId(objId);
310
                            body = getResultFromAPI(body, claimId, false);
311
                            body.setOpenaireId(openaireId);
312
                            //TODO check which one of the ids we should keep!!!!
313
                        }
314
                }
315
            }
316
            //TODO check if it is a dataset
317
            if(!body.isFound()) {
318
                try {
319
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
320
                    Date date = new java.util.Date();
321
                    out.println(new Timestamp(date.getTime()) + " - Result Not Found: " + body.getOpenaireId() + " (API) in claim " + claimId);
322
                    out.close();
323
                } catch (IOException e) {
324
                    e.printStackTrace();
325
                    System.err.println("Couldn't write to file " + "results_not_found.txt");
326
                }
327
            }
328
        }else {
329
            body = getResultFromSearch(body, claimId);
330
        }
331
        return body;
332
    }
333

    
334
    private static Result getResultFromSearch(Result  r,String claimId){
335
        SearchUtils searchUtils= new SearchUtils();
336
        String searchUri=searchUtils.getResultSearchUrl(r.getOpenaireId());
337
        if(searchUri==null){
338
            return r;
339
        }
340
        System.out.println("Result query: "+searchUri);
341
        try {
342
            String size=null;
343
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
344
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
345
            Document document=dBuilder.parse(searchUri);
346
            XPathFactory xPathfactory= XPathFactory.newInstance();
347
            XPath xpath = xPathfactory.newXPath();
348
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
349
            if (nl.getLength() > 0) {
350
                size= nl.item(0).getNodeValue();
351
            }
352
            if(size!=null && Integer.parseInt(size)>0){
353
                nl = (NodeList) xpath.compile("//field[@name='resulttypename']/@value").evaluate(document, XPathConstants.NODESET);
354
                if (nl.getLength() > 0) {
355
                    r.setResultType(nl.item(0).getNodeValue());
356
                }
357
                nl = (NodeList) xpath.compile("//field[@name='bestlicense']/@value").evaluate(document, XPathConstants.NODESET);
358
                if (nl.getLength() > 0) {
359
                    r.setBestLicense(nl.item(0).getNodeValue());
360
                }
361
                nl = (NodeList) xpath.compile("//field[@name='collectedfrom']/@value").evaluate(document, XPathConstants.NODESET);
362
                if (nl.getLength() > 0) {
363
                    r.setCollectedFrom(nl.item(0).getNodeValue());
364
                }
365
                nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
366
                if (nl.getLength() > 0) {
367
                    r.setTitle(nl.item(0).getNodeValue());
368
                }
369
                nl = (NodeList) xpath.compile("//field[@name='pid']").evaluate(document, XPathConstants.NODESET);
370
                for(int i=0; i<nl.getLength();i++){
371
                    String id="";
372
                    String type="";
373
                    int count;
374

    
375

    
376
                    NodeList valueNodes = (NodeList) xpath.compile("//field[@name='value']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
377
                        if (valueNodes.getLength() > 0) {
378
                            id = valueNodes.item(i).getNodeValue();
379
                        }
380
                    NodeList classNodes = (NodeList) xpath.compile("//field[@name='classid']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
381
                        if (classNodes.getLength() > 0) {
382
                            type = classNodes.item(i).getNodeValue();
383
                        }
384
                         if (type.equals("doi") && id != null) {
385
                            r.setDoi(id);
386
                            r.setExternal_url("http://dx.doi.org/"+r.getDoi());
387

    
388
                        }else if(type.equals("pmc") && id != null) {
389
                             r.setPmcid(id);
390
                         }
391

    
392
                }
393

    
394

    
395
            }else{
396
                try{
397
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
398
                    Date date= new java.util.Date();
399
                    out.println(new Timestamp(date.getTime())+" - Result Not Found: "+r.getOpenaireId()+ " in claim "+claimId);
400
                    out.close();
401
                }catch (IOException e) {
402
                    e.printStackTrace();
403
                    System.err.println("Couldn't write to file " + "results_not_found.txt");
404
                }
405
                //System.err.println("Result Not Found " + r.getOpenaireId());
406
                r.setFound(false);
407
            }
408

    
409
        } catch (Exception e) {
410
            e.printStackTrace();
411
            return null;
412
        }
413
        return r;
414
    }
415
    public static String getObjIdentifierFromSearch(String id){
416
        SearchUtils searchUtils= new SearchUtils();
417
        String searchUri=searchUtils.getResultSearchUrl(id);
418
        String objIdentifier=null;
419
        if(searchUri==null){
420
            return null;
421
        }
422
        System.out.println("Result query: "+searchUri);
423
        try {
424
            String size=null;
425
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
426
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
427
            Document document=dBuilder.parse(searchUri);
428
            XPathFactory xPathfactory= XPathFactory.newInstance();
429
            XPath xpath = xPathfactory.newXPath();
430
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
431
            if (nl.getLength() > 0) {
432
                size= nl.item(0).getNodeValue();
433
            }
434
            if(size!=null && Integer.parseInt(size)>0){
435
                //indexId="objIdentifier"
436
                //<field name="resultId" multiplicity="false" indexId="objIdentifier" label="Object id" value="dedup_wf_001::7832a296929028bbe447d66398a0c43a"/>
437
                nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
438
                if (nl.getLength() > 0) {
439
                    objIdentifier=nl.item(0).getNodeValue();
440
                }
441

    
442

    
443
            }
444
        } catch (Exception e) {
445
            e.printStackTrace();
446
            return null;
447
        }
448
        return objIdentifier;
449
    }
450
    private static Result getResultFromAPI(Result  r,String claimId, boolean isPublication){
451
        SearchUtils searchUtils= new SearchUtils();
452
        String searchUri;
453
        if(isPublication) {
454
            searchUri = searchUtils.getPublicationApiUrl(r.getOpenaireId());
455
        }else{
456
            searchUri = searchUtils.getDatasetApiUrl(r.getOpenaireId());
457
        }
458
        if(searchUri==null){
459
            return r;
460
        }
461
        System.out.println("Result query: "+searchUri);
462
        try {
463
            URL obj =obj = new URL(searchUri);
464
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
465
            int responseCode = con.getResponseCode();
466
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
467
            StringBuffer response = new StringBuffer();
468
            String inputLine;
469
            while ((inputLine = in.readLine()) != null) {
470
                response.append(inputLine+"\n");
471
            }
472
            in.close();
473
             String xml = response.toString();
474
            String size=null;
475
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
476
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
477
            //Document document=dBuilder.parse(searchUri);
478
            InputSource is = new InputSource(new StringReader(xml));
479
            Document document= document=dBuilder.parse(is);
480
            XPathFactory xPathfactory= XPathFactory.newInstance();
481
            XPath xpath = xPathfactory.newXPath();
482

    
483

    
484
            NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
485
            if (nl.getLength() > 0) {
486
                size= nl.item(0).getNodeValue();
487
            }
488
            if(size!=null && Integer.parseInt(size)>0){
489
                nl = (NodeList) xpath.compile("//resulttype/@classid").evaluate(document, XPathConstants.NODESET);
490
                if (nl.getLength() > 0) {
491
                    r.setResultType(nl.item(0).getNodeValue());
492
                }
493
                nl = (NodeList) xpath.compile("//bestlicense/@classid").evaluate(document, XPathConstants.NODESET);
494
                if (nl.getLength() > 0) {
495
                    r.setBestLicense(nl.item(0).getNodeValue());
496
                }
497
                //TODO check this
498
               /* nl = (NodeList) xpath.compile("//collectedfrom/@id").evaluate(document, XPathConstants.NODESET);
499
                r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
500
                for(int i=0;i<nl.getLength();i++) {
501
                    String datasourceId=nl.item(i).getNodeValue();
502
                    if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
503
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
504
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_CROSSREF)) {
505
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
506
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_DATACTE)) {
507
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
508
                    }else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
509
                        r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
510
                    }
511
                }*/
512
//                if (nl.getLength() > 0) {
513
//                    r.setCollectedFrom(nl.item(0).getNodeValue());
514
//                }
515
                nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
516
                if (nl.getLength() > 0) {
517
                    r.setTitle(nl.item(0).getNodeValue());
518
                }
519
                nl = (NodeList) xpath.compile("//pid[@classid='doi']/text()").evaluate(document, XPathConstants.NODESET);
520
                if (nl.getLength() > 0) {
521
                    r.setDoi(nl.item(0).getNodeValue());
522
                    r.setExternal_url("http://dx.doi.org/"+r.getDoi());
523
                }
524
                nl = (NodeList) xpath.compile("//pid[@classid='pmc']/text()").evaluate(document, XPathConstants.NODESET);
525
                if (nl.getLength() > 0) {
526
                    r.setPmcid(nl.item(0).getNodeValue());
527

    
528
                }
529
                r.setXml(xml);
530
                r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
531
                r.setFound(true);
532

    
533
            }else{
534
                r.setFound(false);
535
            }
536

    
537
        } catch (Exception e) {
538
            e.printStackTrace();
539
            return null;
540
        }
541
        return r;
542
    }
543
    public static Result getXmlfromDatacite(Result  r){
544
        r.setResultType(ClaimUtils.DATASET);
545
        SearchUtils searchUtils= new SearchUtils();
546
        if(r!=null&&r.getDoi()==null){
547
            return r;
548
        }
549
        String xml =searchUtils.getDataciteXmlRecord(r.getDoi());
550
         if(xml==null){
551
            return r;
552
        }
553
         try {
554
            String size=null;
555
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
556
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
557
            InputSource is = new InputSource(new StringReader(xml));
558
            Document document= document=dBuilder.parse(is);
559
            XPathFactory xPathfactory= XPathFactory.newInstance();
560
            XPath xpath = xPathfactory.newXPath();
561
            NodeList nl  ;
562

    
563
                nl = (NodeList) xpath.compile("//j.0:title/text()").evaluate(document, XPathConstants.NODESET);
564
                if (nl.getLength() > 0) {
565
                    r.setTitle(nl.item(0).getNodeValue());
566
                }
567
                nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
568
                if (nl.getLength() > 0) {
569
                    r.setDoi(nl.item(0).getNodeValue());
570
                    r.setExternal_url("http://dx.doi.org/"+r.getDoi());
571
                }
572
                r.setXml(xml);
573
             r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
574
                r.setFound(true);
575
        } catch (Exception e) {
576
            e.printStackTrace();
577
            return null;
578
        }
579
        return r;
580
    }
581
    public static Result getJsonfromCrossref(Result  r){
582
        SearchUtils searchUtils= new SearchUtils();
583
        if(r!=null&&r.getDoi()==null){
584
            return r;
585
        }
586
        String xml =searchUtils.getCrossrefJsonRecord(r.getDoi());
587
        if(xml!=null){
588
            r.setXml(xml);
589
            r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
590
        }
591
        return r;
592
    }
593
    public static Result getXmlfromOrcid(Result  r){
594
        r.setResultType(ClaimUtils.PUBLICATION);
595
//        SearchUtils searchUtils= new SearchUtils();
596
//        if(r!=null&&r.getOrcidworkid()==null){
597
//            return r;
598
//        }
599
//        System.out.println("OWI: " + r.getOrcidworkid());
600
//        String orcid=r.getOrcidworkid().substring(0,19);
601
//        String orcidworkid=r.getOrcidworkid().substring(20,r.getOrcidworkid().length());
602
//                //0000-0003-5000-0001
603
//        System.out.println("orcid: "+orcid);
604
//        System.out.println("OWI: "+orcidworkid);
605
//        String xml =searchUtils.getOrcidXmlRecord(orcid);
606
//        if(xml==null){
607
//            return r;
608
//        }
609
//        try {
610
//            String size=null;
611
//            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
612
//            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
613
//            InputSource is = new InputSource(new StringReader(xml));
614
//            Document document= document=dBuilder.parse(is);
615
//            XPathFactory xPathfactory= XPathFactory.newInstance();
616
//            XPath xpath = xPathfactory.newXPath();
617
//            NodeList nl  ;
618
////            getParent().toXML()
619
////            <orcid-work put-code="19500531" visibility="public">
620
//            String s = (String) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document, XPathConstants.STRING);
621
//                 System.out.println("Here: "+s);
622
//
623
//
624
//            NodeList worknl = (NodeList) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document,XPathConstants.NODESET);
625
//            if (worknl.getLength() > 0) {
626
//                r.setTitle(worknl.item(0).getParentNode().toString());
627
//
628
//                nl = (NodeList) xpath.compile("//orcid-work").evaluate(document, XPathConstants.NODESET);
629
//                for (int i = 0; i < nl.getLength(); i++) {
630
//                    nl.item(i).getParentNode().removeChild(nl.item(i));
631
//                }
632
//
633
//                nl = (NodeList) xpath.compile("//orcid-works").evaluate(document, XPathConstants.NODESET);
634
//                if (nl.getLength() > 0) {
635
//                    nl.item(0).appendChild(worknl.item(0));
636
//                }
637
//                DOMSource domSource = new DOMSource(document);
638
//                StringWriter writer = new StringWriter();
639
//                StreamResult result = new StreamResult(writer);
640
//                TransformerFactory tf = TransformerFactory.newInstance();
641
//                Transformer transformer = tf.newTransformer();
642
//                transformer.transform(domSource, result);
643
//                System.out.println("XML IN String format is: \n" + writer.toString());
644
//            }
645
//            nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
646
//            if (nl.getLength() > 0) {
647
//                r.setDoi(nl.item(0).getNodeValue());
648
//                r.setExternal_url("http://dx.doi.org/"+r.getDoi());
649
//            }
650
//            r.setXml(xml);
651
//            r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
652
//            r.setFound(true);
653
//        } catch (Exception e) {
654
//            e.printStackTrace();
655
//            return null;
656
//        }
657
        return r;
658
    }
659
        public static Project buildProject(String id, String claimId, boolean useAPI){
660
        Project body=new Project();
661
         if (id.contains("|")) {
662
            id = id.split("\\|")[1];
663
        }
664
        body.setOpenaireId(id);
665
        if(useAPI){
666
            return getProjectFromAPI(body,claimId);
667
        }else{
668
            return getProjectFromSearch(body, claimId);
669
        }
670

    
671

    
672
    }
673

    
674
    private static Project getProjectFromSearch(Project project, String claimId){
675
        SearchUtils s=new SearchUtils();
676
        String searchUri=s.getProjectSearchUrl(project.getOpenaireId());
677
        if(searchUri==null){
678
            return project;
679
        }
680
       System.out.println("Project query: "+searchUri);
681
        try {
682
            String size=null;
683
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
684
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
685
            Document document=dBuilder.parse(searchUri);
686
            XPathFactory xPathfactory= XPathFactory.newInstance();
687
            XPath xpath = xPathfactory.newXPath();
688
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
689
            if (nl.getLength() > 0) {
690
                size= nl.item(0).getNodeValue();
691
            }
692
            if(size!=null && Integer.parseInt(size)>0){
693
                /*
694
                <field name="name" indexId="projectacronym" multiplicity="true" label=colle value="OPENAIRE"/><field name="code" indexId="projectcode" multiplicity="true" label="Project code" value="246686"/><field name="title" indexId="projecttitle" multiplicity="true" label="Title" value="Open Access Infrastructure for Research in Europe"/>
695
                 */
696
                nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
697
                if (nl.getLength() > 0) {
698
                    project.setName(nl.item(0).getNodeValue());
699
                }
700
                nl = (NodeList) xpath.compile("//field[@name='name']/@value").evaluate(document, XPathConstants.NODESET);
701
                if (nl.getLength() > 0) {
702
                    project.setAcronym(nl.item(0).getNodeValue());
703
                }
704
                /*
705
                <test/><field name="funder" multiplicity="true"><field name="funderid" indexId="funderid" label="" value="ec__________::EC"/><field name="fundershortname" indexId="fundershortname" label="" value="EC"/><field name="fundername" indexId="fundername" label="" value="European Commission"/></field>
706
                */
707

    
708
                nl = (NodeList) xpath.compile("//field[@name='fundername']/@value").evaluate(document, XPathConstants.NODESET);
709
                if (nl.getLength() > 0) {
710
                    project.setFunderName(nl.item(0).getNodeValue());
711
                }
712
                nl = (NodeList) xpath.compile("//field[@name='funderid']/@value").evaluate(document, XPathConstants.NODESET);
713
                if (nl.getLength() > 0) {
714
                    project.setFunderId(nl.item(0).getNodeValue());
715
                }
716

    
717
            }else{
718
                try{
719
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
720
                    Date date= new java.util.Date();
721
                    out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
722
                    out.close();
723
                }catch (IOException e) {
724
                    e.printStackTrace();
725
                    System.err.println("Couldn't write to file " + "projects_not_found.txt");
726
                }
727
                //System.err.println("PROJECT Not Found " + project.getOpenaireId());
728
                project.setFound(false);
729

    
730
            }
731

    
732
        } catch (Exception e) {
733
            return null;
734
        }
735
         return project;
736
    }
737
    private static Project getProjectFromAPI(Project project, String claimId){
738

    
739

    
740
        SearchUtils s=new SearchUtils();
741
        String searchUri=s.getProjectApiUrl(project.getOpenaireId());
742
        searchUri="http://api.openaire.eu/search//projects?format=xml&openaireParticipantID=dedup_wf_001::82c87f641bb6219626a0ceca81e0d434";
743
        if(searchUri==null){
744
            return project;
745
        }
746
         //TODO the parsing from API
747
        System.out.println("Project query: "+searchUri);
748
         try {
749
            URL obj =obj = new URL(searchUri);
750
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
751
            int responseCode = con.getResponseCode();
752
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
753
            StringBuffer response = new StringBuffer();
754
            String inputLine;
755
            while ((inputLine = in.readLine()) != null) {
756
                response.append(inputLine);
757
            }
758
            in.close();
759
            String xml = response.toString();
760
            String size=null;
761
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
762
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
763
            //Document document=dBuilder.parse(searchUri);
764
            InputSource is = new InputSource(new StringReader(xml));
765
            Document document= document=dBuilder.parse(is);
766
            XPathFactory xPathfactory= XPathFactory.newInstance();
767
            XPath xpath = xPathfactory.newXPath();
768

    
769

    
770
            NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
771
            if (nl.getLength() > 0) {
772
                size= nl.item(0).getNodeValue();
773
            }
774
            if(size!=null && Integer.parseInt(size)>0){
775

    
776
                nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
777
                if (nl.getLength() > 0) {
778
                    project.setName(nl.item(0).getNodeValue());
779
                }
780
                nl = (NodeList) xpath.compile("//acronym/text()").evaluate(document, XPathConstants.NODESET);
781
                if (nl.getLength() > 0) {
782
                    project.setAcronym(nl.item(0).getNodeValue());
783
                }
784
                nl = (NodeList) xpath.compile("//funder/name/text()").evaluate(document, XPathConstants.NODESET);
785
                if (nl.getLength() > 0) {
786
                    project.setFunderName(nl.item(0).getNodeValue());
787
                }
788
                nl = (NodeList) xpath.compile("//funder/id/text()").evaluate(document, XPathConstants.NODESET);
789
                if (nl.getLength() > 0) {
790
                    project.setFunderId(nl.item(0).getNodeValue());
791
                }
792

    
793
                project.setFound(true);
794

    
795
            }else{
796
                try{
797
                    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
798
                    Date date= new java.util.Date();
799
                    out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
800
                    out.close();
801
                }catch (IOException e) {
802
                    e.printStackTrace();
803
                    System.err.println("Couldn't write to file " + "projects_not_found.txt");
804
                }
805
                //System.err.println("PROJECT Not Found " + project.getOpenaireId());
806
                project.setFound(false);
807
            }
808

    
809
        } catch (Exception e) {
810
            e.printStackTrace();
811
            return null;
812
        }
813
        return project;
814
    }
815
}
(4-4/9)