1
|
package eu.dnetlib.data.claimsDemo;
|
2
|
|
3
|
//import eu.dnetlib.data.claims.migration.Claim;
|
4
|
import eu.dnetlib.data.claims.migration.*;
|
5
|
import org.w3c.dom.Document;
|
6
|
import org.w3c.dom.Element;
|
7
|
import org.w3c.dom.Node;
|
8
|
import org.w3c.dom.NodeList;
|
9
|
import org.xml.sax.InputSource;
|
10
|
import org.xml.sax.SAXException;
|
11
|
|
12
|
import javax.xml.parsers.DocumentBuilder;
|
13
|
import javax.xml.parsers.DocumentBuilderFactory;
|
14
|
import javax.xml.parsers.ParserConfigurationException;
|
15
|
import javax.xml.transform.Transformer;
|
16
|
import javax.xml.transform.TransformerFactory;
|
17
|
import javax.xml.transform.dom.DOMSource;
|
18
|
import javax.xml.transform.stream.StreamResult;
|
19
|
import javax.xml.xpath.XPath;
|
20
|
import javax.xml.xpath.XPathConstants;
|
21
|
import javax.xml.xpath.XPathExpression;
|
22
|
import javax.xml.xpath.XPathFactory;
|
23
|
import java.io.*;
|
24
|
import java.net.HttpURLConnection;
|
25
|
import java.net.URL;
|
26
|
import java.sql.Timestamp;
|
27
|
import java.util.Date;
|
28
|
|
29
|
/**
|
30
|
* Created by argirok on 20/11/2015.
|
31
|
*/
|
32
|
/*
|
33
|
Parsing xml from claims DB
|
34
|
* DMF xml
|
35
|
* Relation XML
|
36
|
*/
|
37
|
public class ParsingClaimUtils {
|
38
|
private DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
39
|
private DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
40
|
private InputSource inputSource;
|
41
|
private Document document;
|
42
|
private XPathFactory xPathfactory;
|
43
|
private XPath xpath;
|
44
|
private String relationType;
|
45
|
private SearchUtils searchUtils= new SearchUtils();
|
46
|
|
47
|
/*ParsingClaimUtils(String xml) throws ParserConfigurationException, IOException, SAXException {
|
48
|
inputSource = new InputSource(new StringReader(xml));
|
49
|
document = dBuilder.parse(inputSource);
|
50
|
relationType=getAttributeFromRel2ActionsXML("type");
|
51
|
}*/
|
52
|
ParsingClaimUtils(String uri, boolean search) throws ParserConfigurationException, IOException, SAXException{
|
53
|
document = dBuilder.parse(uri);
|
54
|
xPathfactory = XPathFactory.newInstance();
|
55
|
xpath = xPathfactory.newXPath();
|
56
|
}
|
57
|
|
58
|
public String getValueFromXMLAtrribute(String elementName) {
|
59
|
return getValueFromElement("field", elementName);
|
60
|
}
|
61
|
private String getValueFromElement(String element, String elementName) {
|
62
|
XPathExpression expr = null;
|
63
|
try {
|
64
|
expr = xpath.compile(String.format("//%s[@name=\"%s\"]", element, elementName));
|
65
|
NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
|
66
|
if (nl.getLength() > 0) {
|
67
|
Node nNode = nl.item(0);
|
68
|
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
|
69
|
Element eElement = (Element) nNode;
|
70
|
return eElement.getAttribute("value");
|
71
|
}
|
72
|
}
|
73
|
return null;
|
74
|
} catch (Exception e) {
|
75
|
e.printStackTrace();
|
76
|
return null;
|
77
|
|
78
|
}
|
79
|
}
|
80
|
private String getResultsSize() {
|
81
|
XPathExpression expr = null;
|
82
|
try {
|
83
|
expr = xpath.compile("//total/text()");
|
84
|
NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
|
85
|
if (nl.getLength() > 0) {
|
86
|
return nl.item(0).getNodeValue();
|
87
|
}
|
88
|
return null;
|
89
|
} catch (Exception e) {
|
90
|
e.printStackTrace();
|
91
|
return null;
|
92
|
|
93
|
}
|
94
|
}
|
95
|
|
96
|
/*
|
97
|
Get the type of the target of an annotation /
|
98
|
Get the type of the source of a claim /
|
99
|
|
100
|
*/
|
101
|
static public String getTargetType(String relationType) {
|
102
|
String sourceType = ClaimUtils.PUBLICATION;
|
103
|
if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
|
104
|
sourceType = ClaimUtils.PUBLICATION;
|
105
|
} else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
|
106
|
sourceType = ClaimUtils.PUBLICATION;
|
107
|
} else if (relationType.equals("resultProject")) {
|
108
|
sourceType = ClaimUtils.PUBLICATION;
|
109
|
//could be a dataset too
|
110
|
} else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
|
111
|
sourceType = ClaimUtils.DATASET;
|
112
|
} else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
|
113
|
sourceType = ClaimUtils.DATASET;
|
114
|
}
|
115
|
return sourceType;
|
116
|
}
|
117
|
/*
|
118
|
Get the type of the body of an annotation /
|
119
|
Get the type of the target of a claim /
|
120
|
|
121
|
*/
|
122
|
public static String getBodyType(String relationType) {
|
123
|
String sourceType = ClaimUtils.PUBLICATION;
|
124
|
if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
|
125
|
sourceType = ClaimUtils.PUBLICATION;
|
126
|
} else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
|
127
|
sourceType = ClaimUtils.DATASET;
|
128
|
} else if (relationType.equals("resultProject")) {
|
129
|
sourceType = ClaimUtils.PROJECT;
|
130
|
//could be a dataset too
|
131
|
} else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
|
132
|
sourceType = ClaimUtils.PUBLICATION;
|
133
|
} else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
|
134
|
sourceType = ClaimUtils.DATASET;
|
135
|
}
|
136
|
return sourceType;
|
137
|
}
|
138
|
|
139
|
static public Result getResultFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException {
|
140
|
Result r=new Result();
|
141
|
r.setXml(xml);
|
142
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
143
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
144
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
145
|
Document document=dBuilder.parse(inputSource);
|
146
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
147
|
XPath xpath = xPathfactory.newXPath();
|
148
|
try {
|
149
|
NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
|
150
|
if (nl.getLength() > 0) {
|
151
|
r.setDoi(nl.item(0).getNodeValue());
|
152
|
}
|
153
|
nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
|
154
|
if (nl.getLength() > 0) {
|
155
|
r.setOrcidworkid(nl.item(0).getNodeValue());
|
156
|
}
|
157
|
|
158
|
// DON'T DELETE following lines for orcidworkid!!
|
159
|
// @identiferType typo is made on purpose -> there are dmf xml with this typo.
|
160
|
if(r.getXml().contains("@identiferType='orcidworkid'")) {
|
161
|
r.setXml(r.getXml().replace("@identiferType='orcidworkid'","@identifierType='orcidworkid'"));
|
162
|
nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
|
163
|
if (nl.getLength() > 0) {
|
164
|
r.setOrcidworkid(nl.item(0).getNodeValue());
|
165
|
}
|
166
|
}
|
167
|
nl = (NodeList) xpath.compile("//*[local-name()='objIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
|
168
|
if (nl.getLength() > 0) {
|
169
|
r.setOpenaireId(nl.item(0).getNodeValue());
|
170
|
}
|
171
|
nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
|
172
|
if (nl.getLength() > 0) {
|
173
|
r.setExternal_url(nl.item(0).getNodeValue());
|
174
|
}
|
175
|
nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
|
176
|
if (nl.getLength() > 0) {
|
177
|
r.setAccessRights(nl.item(0).getNodeValue());
|
178
|
}
|
179
|
// <oaf:accessrights>EMBARGO</oaf:accessrights>
|
180
|
//<oaf:embargoenddate>2015-03-01</oaf:embargoenddate>
|
181
|
nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
|
182
|
if (nl.getLength() > 0) {
|
183
|
r.setEmbargoEndDate(nl.item(0).getNodeValue());
|
184
|
}
|
185
|
nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
|
186
|
if (nl.getLength() > 0) {
|
187
|
r.setTitle(nl.item(0).getNodeValue());
|
188
|
}
|
189
|
nl = (NodeList) xpath.compile("//*[local-name()='collectedFrom']/@id").evaluate(document, XPathConstants.NODESET);
|
190
|
if (nl.getLength() > 0) {
|
191
|
r.setCollectedFrom(nl.item(0).getNodeValue());
|
192
|
}
|
193
|
} catch (Exception e) {
|
194
|
e.printStackTrace();
|
195
|
return null;
|
196
|
|
197
|
}
|
198
|
return r;
|
199
|
}
|
200
|
static public Claim getClaimFromConceptDMF(Claim claim, String xml) {
|
201
|
Result r=new Result();
|
202
|
Context context =new Context();
|
203
|
|
204
|
try {
|
205
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
206
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
207
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
208
|
Document document=dBuilder.parse(inputSource);
|
209
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
210
|
XPath xpath = xPathfactory.newXPath();
|
211
|
NodeList nl = (NodeList) xpath.compile("//*[local-name()='recordIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
|
212
|
if (nl.getLength() > 0) {
|
213
|
r.setOpenaireId(nl.item(0).getNodeValue());
|
214
|
}
|
215
|
nl = (NodeList) xpath.compile("//*[local-name()='concept']/@id").evaluate(document, XPathConstants.NODESET);
|
216
|
if (nl.getLength() > 0) {
|
217
|
context.setOpenaireId(nl.item(0).getNodeValue());
|
218
|
}
|
219
|
} catch (Exception e) {
|
220
|
e.printStackTrace();
|
221
|
return null;
|
222
|
|
223
|
}
|
224
|
|
225
|
claim.setTarget(buildResult(r.getOpenaireId(), null,claim.getId(),ClaimUtils.USEAPIRESULTS));
|
226
|
claim.setSource(buildContext(context));
|
227
|
return claim;
|
228
|
}
|
229
|
static public Claim getRelationClaim(Claim claim, String xml) throws IOException, SAXException, ParserConfigurationException {
|
230
|
String relationType="";
|
231
|
String sourceId=""; //Annotation source
|
232
|
String targetId=""; //Annotation target
|
233
|
// //<RELATION type='resultProject' source='50|od______1266::af81022e9c489007a8f9ab27c2c725cb' target='40|fct_________::0432268334291febec6d0dbc1f8bae5d' />
|
234
|
|
235
|
try {
|
236
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
237
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
238
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
239
|
Document document=dBuilder.parse(inputSource);
|
240
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
241
|
XPath xpath = xPathfactory.newXPath();
|
242
|
NodeList nl = (NodeList) xpath.compile("//RELATION/@type").evaluate(document, XPathConstants.NODESET);
|
243
|
if (nl.getLength() > 0) {
|
244
|
relationType=(nl.item(0).getNodeValue());
|
245
|
}
|
246
|
nl = (NodeList) xpath.compile("//RELATION/@source").evaluate(document, XPathConstants.NODESET);
|
247
|
if (nl.getLength() > 0) {
|
248
|
targetId=(nl.item(0).getNodeValue());
|
249
|
}
|
250
|
nl = (NodeList) xpath.compile("//RELATION/@target").evaluate(document, XPathConstants.NODESET);
|
251
|
if (nl.getLength() > 0) {
|
252
|
sourceId=(nl.item(0).getNodeValue());
|
253
|
}
|
254
|
} catch (Exception e) {
|
255
|
e.printStackTrace();
|
256
|
return null;
|
257
|
|
258
|
}
|
259
|
claim.setTarget(buildResult(targetId, getTargetType(relationType),claim.getId(),ClaimUtils.USEAPIRESULTS));
|
260
|
String bodyType=getBodyType(relationType);
|
261
|
if (bodyType.equals(ClaimUtils.PROJECT)) {
|
262
|
claim.setSource(buildProject(sourceId, claim.getId(),ClaimUtils.USEAPIPROJECTS));
|
263
|
} else {
|
264
|
claim.setSource(buildResult(sourceId, bodyType,claim.getId(),ClaimUtils.USEAPIRESULTS));
|
265
|
bodyType=((Result)claim.getSource()).getResultType();
|
266
|
}
|
267
|
claim.setTargetType(((Result) claim.getTarget()).getResultType());
|
268
|
claim.setSourceType(bodyType);
|
269
|
return claim;
|
270
|
}
|
271
|
static public Context buildContext(Context context){
|
272
|
if(context!=null&&context.getOpenaireId()!=null){
|
273
|
try {
|
274
|
context.setTitle(ContextUtils.extractEgiLabel(context.getOpenaireId()));
|
275
|
}catch (Exception e){
|
276
|
e.printStackTrace();
|
277
|
System.err.println("ContextUtils: Couldn't get Egi label for id "+context.getId());
|
278
|
}
|
279
|
}
|
280
|
return context;
|
281
|
}
|
282
|
static Result buildResult(String id, String type,String claimId,boolean useApi) {
|
283
|
Result body= new Result();
|
284
|
if (id.contains("|")) {
|
285
|
id = id.split("\\|")[1];
|
286
|
}
|
287
|
body.setOpenaireId(id);
|
288
|
body.setResultType(type);
|
289
|
if(useApi){
|
290
|
//look for a publication
|
291
|
body=getResultFromAPI(body, claimId,true);
|
292
|
String openaireId=body.getOpenaireId();
|
293
|
String objId=getObjIdentifierFromSearch(openaireId);
|
294
|
if(!body.isFound()){
|
295
|
if(objId!=null&&!openaireId.equals(objId)){
|
296
|
// not found! look for a publication with objId
|
297
|
//if result not found in API with openaireId, search for it with objIdentifier
|
298
|
body.setOpenaireId(objId);
|
299
|
body=getResultFromAPI(body, claimId,true);
|
300
|
body.setOpenaireId(openaireId);
|
301
|
//TODO check which one of the ids we should keep!!!!
|
302
|
}
|
303
|
if(!body.isFound()){
|
304
|
//if still not found search for Dataset with the id
|
305
|
body = getResultFromAPI(body, claimId, false);
|
306
|
if (!body.isFound() && (objId = getObjIdentifierFromSearch(openaireId)) != null && !openaireId.equals(objId)) {
|
307
|
// still not found!! search for Dataset with the onjId
|
308
|
//if result not found in API with openaireId, search for it with objIdentifier
|
309
|
body.setOpenaireId(objId);
|
310
|
body = getResultFromAPI(body, claimId, false);
|
311
|
body.setOpenaireId(openaireId);
|
312
|
//TODO check which one of the ids we should keep!!!!
|
313
|
}
|
314
|
}
|
315
|
}
|
316
|
//TODO check if it is a dataset
|
317
|
if(!body.isFound()) {
|
318
|
try {
|
319
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
|
320
|
Date date = new java.util.Date();
|
321
|
out.println(new Timestamp(date.getTime()) + " - Result Not Found: " + body.getOpenaireId() + " (API) in claim " + claimId);
|
322
|
out.close();
|
323
|
} catch (IOException e) {
|
324
|
e.printStackTrace();
|
325
|
System.err.println("Couldn't write to file " + "results_not_found.txt");
|
326
|
}
|
327
|
}
|
328
|
}else {
|
329
|
body = getResultFromSearch(body, claimId);
|
330
|
}
|
331
|
return body;
|
332
|
}
|
333
|
|
334
|
private static Result getResultFromSearch(Result r,String claimId){
|
335
|
SearchUtils searchUtils= new SearchUtils();
|
336
|
String searchUri=searchUtils.getResultSearchUrl(r.getOpenaireId());
|
337
|
if(searchUri==null){
|
338
|
return r;
|
339
|
}
|
340
|
System.out.println("Result query: "+searchUri);
|
341
|
try {
|
342
|
String size=null;
|
343
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
344
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
345
|
Document document=dBuilder.parse(searchUri);
|
346
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
347
|
XPath xpath = xPathfactory.newXPath();
|
348
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
349
|
if (nl.getLength() > 0) {
|
350
|
size= nl.item(0).getNodeValue();
|
351
|
}
|
352
|
if(size!=null && Integer.parseInt(size)>0){
|
353
|
nl = (NodeList) xpath.compile("//field[@name='resulttypename']/@value").evaluate(document, XPathConstants.NODESET);
|
354
|
if (nl.getLength() > 0) {
|
355
|
r.setResultType(nl.item(0).getNodeValue());
|
356
|
}
|
357
|
nl = (NodeList) xpath.compile("//field[@name='bestlicense']/@value").evaluate(document, XPathConstants.NODESET);
|
358
|
if (nl.getLength() > 0) {
|
359
|
r.setBestLicense(nl.item(0).getNodeValue());
|
360
|
}
|
361
|
nl = (NodeList) xpath.compile("//field[@name='collectedfrom']/@value").evaluate(document, XPathConstants.NODESET);
|
362
|
if (nl.getLength() > 0) {
|
363
|
r.setCollectedFrom(nl.item(0).getNodeValue());
|
364
|
}
|
365
|
nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
|
366
|
if (nl.getLength() > 0) {
|
367
|
r.setTitle(nl.item(0).getNodeValue());
|
368
|
}
|
369
|
nl = (NodeList) xpath.compile("//field[@name='pid']").evaluate(document, XPathConstants.NODESET);
|
370
|
for(int i=0; i<nl.getLength();i++){
|
371
|
String id="";
|
372
|
String type="";
|
373
|
int count;
|
374
|
|
375
|
|
376
|
NodeList valueNodes = (NodeList) xpath.compile("//field[@name='value']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
|
377
|
if (valueNodes.getLength() > 0) {
|
378
|
id = valueNodes.item(i).getNodeValue();
|
379
|
}
|
380
|
NodeList classNodes = (NodeList) xpath.compile("//field[@name='classid']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
|
381
|
if (classNodes.getLength() > 0) {
|
382
|
type = classNodes.item(i).getNodeValue();
|
383
|
}
|
384
|
if (type.equals("doi") && id != null) {
|
385
|
r.setDoi(id);
|
386
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
387
|
|
388
|
}else if(type.equals("pmc") && id != null) {
|
389
|
r.setPmcid(id);
|
390
|
}
|
391
|
|
392
|
}
|
393
|
|
394
|
|
395
|
}else{
|
396
|
try{
|
397
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
|
398
|
Date date= new java.util.Date();
|
399
|
out.println(new Timestamp(date.getTime())+" - Result Not Found: "+r.getOpenaireId()+ " in claim "+claimId);
|
400
|
out.close();
|
401
|
}catch (IOException e) {
|
402
|
e.printStackTrace();
|
403
|
System.err.println("Couldn't write to file " + "results_not_found.txt");
|
404
|
}
|
405
|
//System.err.println("Result Not Found " + r.getOpenaireId());
|
406
|
r.setFound(false);
|
407
|
}
|
408
|
|
409
|
} catch (Exception e) {
|
410
|
e.printStackTrace();
|
411
|
return null;
|
412
|
}
|
413
|
return r;
|
414
|
}
|
415
|
public static String getObjIdentifierFromSearch(String id){
|
416
|
SearchUtils searchUtils= new SearchUtils();
|
417
|
String searchUri=searchUtils.getResultSearchUrl(id);
|
418
|
String objIdentifier=null;
|
419
|
if(searchUri==null){
|
420
|
return null;
|
421
|
}
|
422
|
System.out.println("Result query: "+searchUri);
|
423
|
try {
|
424
|
String size=null;
|
425
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
426
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
427
|
Document document=dBuilder.parse(searchUri);
|
428
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
429
|
XPath xpath = xPathfactory.newXPath();
|
430
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
431
|
if (nl.getLength() > 0) {
|
432
|
size= nl.item(0).getNodeValue();
|
433
|
}
|
434
|
if(size!=null && Integer.parseInt(size)>0){
|
435
|
//indexId="objIdentifier"
|
436
|
//<field name="resultId" multiplicity="false" indexId="objIdentifier" label="Object id" value="dedup_wf_001::7832a296929028bbe447d66398a0c43a"/>
|
437
|
nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
|
438
|
if (nl.getLength() > 0) {
|
439
|
objIdentifier=nl.item(0).getNodeValue();
|
440
|
}
|
441
|
|
442
|
|
443
|
}
|
444
|
} catch (Exception e) {
|
445
|
e.printStackTrace();
|
446
|
return null;
|
447
|
}
|
448
|
return objIdentifier;
|
449
|
}
|
450
|
private static Result getResultFromAPI(Result r,String claimId, boolean isPublication){
|
451
|
SearchUtils searchUtils= new SearchUtils();
|
452
|
String searchUri;
|
453
|
if(isPublication) {
|
454
|
searchUri = searchUtils.getPublicationApiUrl(r.getOpenaireId());
|
455
|
}else{
|
456
|
searchUri = searchUtils.getDatasetApiUrl(r.getOpenaireId());
|
457
|
}
|
458
|
if(searchUri==null){
|
459
|
return r;
|
460
|
}
|
461
|
System.out.println("Result query: "+searchUri);
|
462
|
try {
|
463
|
URL obj =obj = new URL(searchUri);
|
464
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
465
|
int responseCode = con.getResponseCode();
|
466
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
467
|
StringBuffer response = new StringBuffer();
|
468
|
String inputLine;
|
469
|
while ((inputLine = in.readLine()) != null) {
|
470
|
response.append(inputLine+"\n");
|
471
|
}
|
472
|
in.close();
|
473
|
String xml = response.toString();
|
474
|
String size=null;
|
475
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
476
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
477
|
//Document document=dBuilder.parse(searchUri);
|
478
|
InputSource is = new InputSource(new StringReader(xml));
|
479
|
Document document= document=dBuilder.parse(is);
|
480
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
481
|
XPath xpath = xPathfactory.newXPath();
|
482
|
|
483
|
|
484
|
NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
|
485
|
if (nl.getLength() > 0) {
|
486
|
size= nl.item(0).getNodeValue();
|
487
|
}
|
488
|
if(size!=null && Integer.parseInt(size)>0){
|
489
|
nl = (NodeList) xpath.compile("//resulttype/@classid").evaluate(document, XPathConstants.NODESET);
|
490
|
if (nl.getLength() > 0) {
|
491
|
r.setResultType(nl.item(0).getNodeValue());
|
492
|
}
|
493
|
nl = (NodeList) xpath.compile("//bestlicense/@classid").evaluate(document, XPathConstants.NODESET);
|
494
|
if (nl.getLength() > 0) {
|
495
|
r.setBestLicense(nl.item(0).getNodeValue());
|
496
|
}
|
497
|
//TODO check this
|
498
|
/* nl = (NodeList) xpath.compile("//collectedfrom/@id").evaluate(document, XPathConstants.NODESET);
|
499
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
|
500
|
for(int i=0;i<nl.getLength();i++) {
|
501
|
String datasourceId=nl.item(i).getNodeValue();
|
502
|
if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
|
503
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
504
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_CROSSREF)) {
|
505
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
|
506
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_DATACTE)) {
|
507
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
|
508
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
|
509
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
510
|
}
|
511
|
}*/
|
512
|
// if (nl.getLength() > 0) {
|
513
|
// r.setCollectedFrom(nl.item(0).getNodeValue());
|
514
|
// }
|
515
|
nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
|
516
|
if (nl.getLength() > 0) {
|
517
|
r.setTitle(nl.item(0).getNodeValue());
|
518
|
}
|
519
|
nl = (NodeList) xpath.compile("//pid[@classid='doi']/text()").evaluate(document, XPathConstants.NODESET);
|
520
|
if (nl.getLength() > 0) {
|
521
|
r.setDoi(nl.item(0).getNodeValue());
|
522
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
523
|
}
|
524
|
nl = (NodeList) xpath.compile("//pid[@classid='pmc']/text()").evaluate(document, XPathConstants.NODESET);
|
525
|
if (nl.getLength() > 0) {
|
526
|
r.setPmcid(nl.item(0).getNodeValue());
|
527
|
|
528
|
}
|
529
|
r.setXml(xml);
|
530
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
|
531
|
r.setFound(true);
|
532
|
|
533
|
}else{
|
534
|
r.setFound(false);
|
535
|
}
|
536
|
|
537
|
} catch (Exception e) {
|
538
|
e.printStackTrace();
|
539
|
return null;
|
540
|
}
|
541
|
return r;
|
542
|
}
|
543
|
public static Result getXmlfromDatacite(Result r){
|
544
|
r.setResultType(ClaimUtils.DATASET);
|
545
|
SearchUtils searchUtils= new SearchUtils();
|
546
|
if(r!=null&&r.getDoi()==null){
|
547
|
return r;
|
548
|
}
|
549
|
String xml =searchUtils.getDataciteXmlRecord(r.getDoi());
|
550
|
if(xml==null){
|
551
|
return r;
|
552
|
}
|
553
|
try {
|
554
|
String size=null;
|
555
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
556
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
557
|
InputSource is = new InputSource(new StringReader(xml));
|
558
|
Document document= document=dBuilder.parse(is);
|
559
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
560
|
XPath xpath = xPathfactory.newXPath();
|
561
|
NodeList nl ;
|
562
|
|
563
|
nl = (NodeList) xpath.compile("//j.0:title/text()").evaluate(document, XPathConstants.NODESET);
|
564
|
if (nl.getLength() > 0) {
|
565
|
r.setTitle(nl.item(0).getNodeValue());
|
566
|
}
|
567
|
nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
|
568
|
if (nl.getLength() > 0) {
|
569
|
r.setDoi(nl.item(0).getNodeValue());
|
570
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
571
|
}
|
572
|
r.setXml(xml);
|
573
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
|
574
|
r.setFound(true);
|
575
|
} catch (Exception e) {
|
576
|
e.printStackTrace();
|
577
|
return null;
|
578
|
}
|
579
|
return r;
|
580
|
}
|
581
|
public static Result getJsonfromCrossref(Result r){
|
582
|
SearchUtils searchUtils= new SearchUtils();
|
583
|
if(r!=null&&r.getDoi()==null){
|
584
|
return r;
|
585
|
}
|
586
|
String xml =searchUtils.getCrossrefJsonRecord(r.getDoi());
|
587
|
if(xml!=null){
|
588
|
r.setXml(xml);
|
589
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
|
590
|
}
|
591
|
return r;
|
592
|
}
|
593
|
public static Result getXmlfromOrcid(Result r){
|
594
|
r.setResultType(ClaimUtils.PUBLICATION);
|
595
|
// SearchUtils searchUtils= new SearchUtils();
|
596
|
// if(r!=null&&r.getOrcidworkid()==null){
|
597
|
// return r;
|
598
|
// }
|
599
|
// System.out.println("OWI: " + r.getOrcidworkid());
|
600
|
// String orcid=r.getOrcidworkid().substring(0,19);
|
601
|
// String orcidworkid=r.getOrcidworkid().substring(20,r.getOrcidworkid().length());
|
602
|
// //0000-0003-5000-0001
|
603
|
// System.out.println("orcid: "+orcid);
|
604
|
// System.out.println("OWI: "+orcidworkid);
|
605
|
// String xml =searchUtils.getOrcidXmlRecord(orcid);
|
606
|
// if(xml==null){
|
607
|
// return r;
|
608
|
// }
|
609
|
// try {
|
610
|
// String size=null;
|
611
|
// DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
612
|
// DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
613
|
// InputSource is = new InputSource(new StringReader(xml));
|
614
|
// Document document= document=dBuilder.parse(is);
|
615
|
// XPathFactory xPathfactory= XPathFactory.newInstance();
|
616
|
// XPath xpath = xPathfactory.newXPath();
|
617
|
// NodeList nl ;
|
618
|
//// getParent().toXML()
|
619
|
//// <orcid-work put-code="19500531" visibility="public">
|
620
|
// String s = (String) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document, XPathConstants.STRING);
|
621
|
// System.out.println("Here: "+s);
|
622
|
//
|
623
|
//
|
624
|
// NodeList worknl = (NodeList) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document,XPathConstants.NODESET);
|
625
|
// if (worknl.getLength() > 0) {
|
626
|
// r.setTitle(worknl.item(0).getParentNode().toString());
|
627
|
//
|
628
|
// nl = (NodeList) xpath.compile("//orcid-work").evaluate(document, XPathConstants.NODESET);
|
629
|
// for (int i = 0; i < nl.getLength(); i++) {
|
630
|
// nl.item(i).getParentNode().removeChild(nl.item(i));
|
631
|
// }
|
632
|
//
|
633
|
// nl = (NodeList) xpath.compile("//orcid-works").evaluate(document, XPathConstants.NODESET);
|
634
|
// if (nl.getLength() > 0) {
|
635
|
// nl.item(0).appendChild(worknl.item(0));
|
636
|
// }
|
637
|
// DOMSource domSource = new DOMSource(document);
|
638
|
// StringWriter writer = new StringWriter();
|
639
|
// StreamResult result = new StreamResult(writer);
|
640
|
// TransformerFactory tf = TransformerFactory.newInstance();
|
641
|
// Transformer transformer = tf.newTransformer();
|
642
|
// transformer.transform(domSource, result);
|
643
|
// System.out.println("XML IN String format is: \n" + writer.toString());
|
644
|
// }
|
645
|
// nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
|
646
|
// if (nl.getLength() > 0) {
|
647
|
// r.setDoi(nl.item(0).getNodeValue());
|
648
|
// r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
649
|
// }
|
650
|
// r.setXml(xml);
|
651
|
// r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
652
|
// r.setFound(true);
|
653
|
// } catch (Exception e) {
|
654
|
// e.printStackTrace();
|
655
|
// return null;
|
656
|
// }
|
657
|
return r;
|
658
|
}
|
659
|
public static Project buildProject(String id, String claimId, boolean useAPI){
|
660
|
Project body=new Project();
|
661
|
if (id.contains("|")) {
|
662
|
id = id.split("\\|")[1];
|
663
|
}
|
664
|
body.setOpenaireId(id);
|
665
|
if(useAPI){
|
666
|
return getProjectFromAPI(body,claimId);
|
667
|
}else{
|
668
|
return getProjectFromSearch(body, claimId);
|
669
|
}
|
670
|
|
671
|
|
672
|
}
|
673
|
|
674
|
private static Project getProjectFromSearch(Project project, String claimId){
|
675
|
SearchUtils s=new SearchUtils();
|
676
|
String searchUri=s.getProjectSearchUrl(project.getOpenaireId());
|
677
|
if(searchUri==null){
|
678
|
return project;
|
679
|
}
|
680
|
System.out.println("Project query: "+searchUri);
|
681
|
try {
|
682
|
String size=null;
|
683
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
684
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
685
|
Document document=dBuilder.parse(searchUri);
|
686
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
687
|
XPath xpath = xPathfactory.newXPath();
|
688
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
689
|
if (nl.getLength() > 0) {
|
690
|
size= nl.item(0).getNodeValue();
|
691
|
}
|
692
|
if(size!=null && Integer.parseInt(size)>0){
|
693
|
/*
|
694
|
<field name="name" indexId="projectacronym" multiplicity="true" label=colle value="OPENAIRE"/><field name="code" indexId="projectcode" multiplicity="true" label="Project code" value="246686"/><field name="title" indexId="projecttitle" multiplicity="true" label="Title" value="Open Access Infrastructure for Research in Europe"/>
|
695
|
*/
|
696
|
nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
|
697
|
if (nl.getLength() > 0) {
|
698
|
project.setName(nl.item(0).getNodeValue());
|
699
|
}
|
700
|
nl = (NodeList) xpath.compile("//field[@name='name']/@value").evaluate(document, XPathConstants.NODESET);
|
701
|
if (nl.getLength() > 0) {
|
702
|
project.setAcronym(nl.item(0).getNodeValue());
|
703
|
}
|
704
|
/*
|
705
|
<test/><field name="funder" multiplicity="true"><field name="funderid" indexId="funderid" label="" value="ec__________::EC"/><field name="fundershortname" indexId="fundershortname" label="" value="EC"/><field name="fundername" indexId="fundername" label="" value="European Commission"/></field>
|
706
|
*/
|
707
|
|
708
|
nl = (NodeList) xpath.compile("//field[@name='fundername']/@value").evaluate(document, XPathConstants.NODESET);
|
709
|
if (nl.getLength() > 0) {
|
710
|
project.setFunderName(nl.item(0).getNodeValue());
|
711
|
}
|
712
|
nl = (NodeList) xpath.compile("//field[@name='funderid']/@value").evaluate(document, XPathConstants.NODESET);
|
713
|
if (nl.getLength() > 0) {
|
714
|
project.setFunderId(nl.item(0).getNodeValue());
|
715
|
}
|
716
|
|
717
|
}else{
|
718
|
try{
|
719
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
|
720
|
Date date= new java.util.Date();
|
721
|
out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
|
722
|
out.close();
|
723
|
}catch (IOException e) {
|
724
|
e.printStackTrace();
|
725
|
System.err.println("Couldn't write to file " + "projects_not_found.txt");
|
726
|
}
|
727
|
//System.err.println("PROJECT Not Found " + project.getOpenaireId());
|
728
|
project.setFound(false);
|
729
|
|
730
|
}
|
731
|
|
732
|
} catch (Exception e) {
|
733
|
return null;
|
734
|
}
|
735
|
return project;
|
736
|
}
|
737
|
private static Project getProjectFromAPI(Project project, String claimId){
|
738
|
|
739
|
|
740
|
SearchUtils s=new SearchUtils();
|
741
|
String searchUri=s.getProjectApiUrl(project.getOpenaireId());
|
742
|
searchUri="http://api.openaire.eu/search//projects?format=xml&openaireParticipantID=dedup_wf_001::82c87f641bb6219626a0ceca81e0d434";
|
743
|
if(searchUri==null){
|
744
|
return project;
|
745
|
}
|
746
|
//TODO the parsing from API
|
747
|
System.out.println("Project query: "+searchUri);
|
748
|
try {
|
749
|
URL obj =obj = new URL(searchUri);
|
750
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
751
|
int responseCode = con.getResponseCode();
|
752
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
753
|
StringBuffer response = new StringBuffer();
|
754
|
String inputLine;
|
755
|
while ((inputLine = in.readLine()) != null) {
|
756
|
response.append(inputLine);
|
757
|
}
|
758
|
in.close();
|
759
|
String xml = response.toString();
|
760
|
String size=null;
|
761
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
762
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
763
|
//Document document=dBuilder.parse(searchUri);
|
764
|
InputSource is = new InputSource(new StringReader(xml));
|
765
|
Document document= document=dBuilder.parse(is);
|
766
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
767
|
XPath xpath = xPathfactory.newXPath();
|
768
|
|
769
|
|
770
|
NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
|
771
|
if (nl.getLength() > 0) {
|
772
|
size= nl.item(0).getNodeValue();
|
773
|
}
|
774
|
if(size!=null && Integer.parseInt(size)>0){
|
775
|
|
776
|
nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
|
777
|
if (nl.getLength() > 0) {
|
778
|
project.setName(nl.item(0).getNodeValue());
|
779
|
}
|
780
|
nl = (NodeList) xpath.compile("//acronym/text()").evaluate(document, XPathConstants.NODESET);
|
781
|
if (nl.getLength() > 0) {
|
782
|
project.setAcronym(nl.item(0).getNodeValue());
|
783
|
}
|
784
|
nl = (NodeList) xpath.compile("//funder/name/text()").evaluate(document, XPathConstants.NODESET);
|
785
|
if (nl.getLength() > 0) {
|
786
|
project.setFunderName(nl.item(0).getNodeValue());
|
787
|
}
|
788
|
nl = (NodeList) xpath.compile("//funder/id/text()").evaluate(document, XPathConstants.NODESET);
|
789
|
if (nl.getLength() > 0) {
|
790
|
project.setFunderId(nl.item(0).getNodeValue());
|
791
|
}
|
792
|
|
793
|
project.setFound(true);
|
794
|
|
795
|
}else{
|
796
|
try{
|
797
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
|
798
|
Date date= new java.util.Date();
|
799
|
out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
|
800
|
out.close();
|
801
|
}catch (IOException e) {
|
802
|
e.printStackTrace();
|
803
|
System.err.println("Couldn't write to file " + "projects_not_found.txt");
|
804
|
}
|
805
|
//System.err.println("PROJECT Not Found " + project.getOpenaireId());
|
806
|
project.setFound(false);
|
807
|
}
|
808
|
|
809
|
} catch (Exception e) {
|
810
|
e.printStackTrace();
|
811
|
return null;
|
812
|
}
|
813
|
return project;
|
814
|
}
|
815
|
}
|