1
|
package eu.dnetlib.data.claimsDemo;
|
2
|
|
3
|
//import eu.dnetlib.data.claims.migration.Claim;
|
4
|
import eu.dnetlib.data.claims.migration.*;
|
5
|
import org.w3c.dom.Document;
|
6
|
import org.w3c.dom.Element;
|
7
|
import org.w3c.dom.Node;
|
8
|
import org.w3c.dom.NodeList;
|
9
|
import org.xml.sax.InputSource;
|
10
|
import org.xml.sax.SAXException;
|
11
|
|
12
|
import javax.xml.parsers.DocumentBuilder;
|
13
|
import javax.xml.parsers.DocumentBuilderFactory;
|
14
|
import javax.xml.parsers.ParserConfigurationException;
|
15
|
import javax.xml.transform.Transformer;
|
16
|
import javax.xml.transform.TransformerFactory;
|
17
|
import javax.xml.transform.dom.DOMSource;
|
18
|
import javax.xml.transform.stream.StreamResult;
|
19
|
import javax.xml.xpath.XPath;
|
20
|
import javax.xml.xpath.XPathConstants;
|
21
|
import javax.xml.xpath.XPathExpression;
|
22
|
import javax.xml.xpath.XPathFactory;
|
23
|
import java.io.*;
|
24
|
import java.net.HttpURLConnection;
|
25
|
import java.net.URL;
|
26
|
import java.sql.Timestamp;
|
27
|
import java.util.Date;
|
28
|
|
29
|
/**
|
30
|
* Created by argirok on 20/11/2015.
|
31
|
*/
|
32
|
/*
|
33
|
Parsing xml from claims DB
|
34
|
* DMF xml
|
35
|
* Relation XML
|
36
|
*/
|
37
|
public class ParsingClaimUtils {
|
38
|
private DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
39
|
private DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
40
|
private InputSource inputSource;
|
41
|
private Document document;
|
42
|
private XPathFactory xPathfactory;
|
43
|
private XPath xpath;
|
44
|
private String relationType;
|
45
|
private SearchUtils searchUtils= new SearchUtils();
|
46
|
|
47
|
/*ParsingClaimUtils(String xml) throws ParserConfigurationException, IOException, SAXException {
|
48
|
inputSource = new InputSource(new StringReader(xml));
|
49
|
document = dBuilder.parse(inputSource);
|
50
|
relationType=getAttributeFromRel2ActionsXML("type");
|
51
|
}*/
|
52
|
ParsingClaimUtils(String uri, boolean search) throws ParserConfigurationException, IOException, SAXException{
|
53
|
document = dBuilder.parse(uri);
|
54
|
xPathfactory = XPathFactory.newInstance();
|
55
|
xpath = xPathfactory.newXPath();
|
56
|
}
|
57
|
|
58
|
public String getValueFromXMLAtrribute(String elementName) {
|
59
|
return getValueFromElement("field", elementName);
|
60
|
}
|
61
|
private String getValueFromElement(String element, String elementName) {
|
62
|
XPathExpression expr = null;
|
63
|
try {
|
64
|
expr = xpath.compile(String.format("//%s[@name=\"%s\"]", element, elementName));
|
65
|
NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
|
66
|
if (nl.getLength() > 0) {
|
67
|
Node nNode = nl.item(0);
|
68
|
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
|
69
|
Element eElement = (Element) nNode;
|
70
|
return eElement.getAttribute("value");
|
71
|
}
|
72
|
}
|
73
|
return null;
|
74
|
} catch (Exception e) {
|
75
|
e.printStackTrace();
|
76
|
return null;
|
77
|
|
78
|
}
|
79
|
}
|
80
|
private String getResultsSize() {
|
81
|
XPathExpression expr = null;
|
82
|
try {
|
83
|
expr = xpath.compile("//total/text()");
|
84
|
NodeList nl = (NodeList) expr.evaluate(document, XPathConstants.NODESET);
|
85
|
if (nl.getLength() > 0) {
|
86
|
return nl.item(0).getNodeValue();
|
87
|
}
|
88
|
return null;
|
89
|
} catch (Exception e) {
|
90
|
e.printStackTrace();
|
91
|
return null;
|
92
|
|
93
|
}
|
94
|
}
|
95
|
|
96
|
/*
|
97
|
Get the type of the target of an annotation /
|
98
|
Get the type of the source of a claim /
|
99
|
|
100
|
*/
|
101
|
static public String getTargetType(String relationType) {
|
102
|
String sourceType = ClaimUtils.PUBLICATION;
|
103
|
if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
|
104
|
sourceType = ClaimUtils.PUBLICATION;
|
105
|
} else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
|
106
|
sourceType = ClaimUtils.PUBLICATION;
|
107
|
} else if (relationType.equals("resultProject")) {
|
108
|
sourceType = ClaimUtils.PUBLICATION;
|
109
|
//could be a dataset too
|
110
|
} else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
|
111
|
sourceType = ClaimUtils.DATASET;
|
112
|
} else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
|
113
|
sourceType = ClaimUtils.DATASET;
|
114
|
}
|
115
|
return sourceType;
|
116
|
}
|
117
|
/*
|
118
|
Get the type of the body of an annotation /
|
119
|
Get the type of the target of a claim /
|
120
|
|
121
|
*/
|
122
|
public static String getBodyType(String relationType) {
|
123
|
String sourceType = ClaimUtils.PUBLICATION;
|
124
|
if (relationType.equals("resultResult_publicationpublication_isRelatedTo")) {
|
125
|
sourceType = ClaimUtils.PUBLICATION;
|
126
|
} else if (relationType.equals("resultResult_publicationdataset_isRelatedTo")) {
|
127
|
sourceType = ClaimUtils.DATASET;
|
128
|
} else if (relationType.equals("resultProject")) {
|
129
|
sourceType = ClaimUtils.PROJECT;
|
130
|
//could be a dataset too
|
131
|
} else if (relationType.equals("resultResult_datasetpublication_isRelatedTo")) {
|
132
|
sourceType = ClaimUtils.PUBLICATION;
|
133
|
} else if (relationType.equals("resultResult_datasetdataset_isRelatedTo")) {
|
134
|
sourceType = ClaimUtils.DATASET;
|
135
|
}
|
136
|
return sourceType;
|
137
|
}
|
138
|
|
139
|
static public Result getResultFromDMF(String xml) throws ParserConfigurationException, IOException, SAXException {
|
140
|
Result r=new Result();
|
141
|
r.setXml(xml);
|
142
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
143
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
144
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
145
|
Document document=dBuilder.parse(inputSource);
|
146
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
147
|
XPath xpath = xPathfactory.newXPath();
|
148
|
try {
|
149
|
NodeList nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='doi']/text()").evaluate(document, XPathConstants.NODESET);
|
150
|
if (nl.getLength() > 0) {
|
151
|
r.setDoi(nl.item(0).getNodeValue());
|
152
|
}
|
153
|
nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identifierType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
|
154
|
if (nl.getLength() > 0) {
|
155
|
r.setOrcidworkid(nl.item(0).getNodeValue());
|
156
|
}
|
157
|
|
158
|
// DON'T DELETE following lines for orcidworkid!!
|
159
|
// @identiferType typo is made on purpose -> there are dmf xml with this typo.
|
160
|
if(r.getXml().contains("@identiferType='orcidworkid'")) {
|
161
|
r.setXml(r.getXml().replace("@identiferType='orcidworkid'","@identifierType='orcidworkid'"));
|
162
|
nl = (NodeList) xpath.compile("//*[local-name()=\"identifier\"][@identiferType='orcidworkid']/text()").evaluate(document, XPathConstants.NODESET);
|
163
|
if (nl.getLength() > 0) {
|
164
|
r.setOrcidworkid(nl.item(0).getNodeValue());
|
165
|
}
|
166
|
}
|
167
|
nl = (NodeList) xpath.compile("//*[local-name()='objIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
|
168
|
if (nl.getLength() > 0) {
|
169
|
r.setOpenaireId(nl.item(0).getNodeValue());
|
170
|
}
|
171
|
nl = (NodeList) xpath.compile("//*[local-name()='identifier']/text()").evaluate(document, XPathConstants.NODESET);
|
172
|
if (nl.getLength() > 0) {
|
173
|
r.setExternal_url(nl.item(0).getNodeValue());
|
174
|
}
|
175
|
nl = (NodeList) xpath.compile("//*[local-name()='accessrights']/text()").evaluate(document, XPathConstants.NODESET);
|
176
|
if (nl.getLength() > 0) {
|
177
|
r.setAccessRights(nl.item(0).getNodeValue());
|
178
|
}
|
179
|
// <oaf:accessrights>EMBARGO</oaf:accessrights>
|
180
|
//<oaf:embargoenddate>2015-03-01</oaf:embargoenddate>
|
181
|
nl = (NodeList) xpath.compile("//embargoenddate/text()").evaluate(document, XPathConstants.NODESET);
|
182
|
if (nl.getLength() > 0) {
|
183
|
r.setEmbargoEndDate(nl.item(0).getNodeValue());
|
184
|
}
|
185
|
nl = (NodeList) xpath.compile("//*[local-name()='title']/text()").evaluate(document, XPathConstants.NODESET);
|
186
|
if (nl.getLength() > 0) {
|
187
|
r.setTitle(nl.item(0).getNodeValue());
|
188
|
}
|
189
|
nl = (NodeList) xpath.compile("//*[local-name()='collectedFrom']/@id").evaluate(document, XPathConstants.NODESET);
|
190
|
if (nl.getLength() > 0) {
|
191
|
r.setCollectedFrom(nl.item(0).getNodeValue());
|
192
|
}
|
193
|
} catch (Exception e) {
|
194
|
e.printStackTrace();
|
195
|
return null;
|
196
|
|
197
|
}
|
198
|
return r;
|
199
|
}
|
200
|
static public void getClaimFromConceptDMF(Claim claim, String xml) {
|
201
|
Result r=new Result();
|
202
|
Context context =new Context();
|
203
|
|
204
|
try {
|
205
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
206
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
207
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
208
|
Document document=dBuilder.parse(inputSource);
|
209
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
210
|
XPath xpath = xPathfactory.newXPath();
|
211
|
NodeList nl = (NodeList) xpath.compile("//*[local-name()='recordIdentifier']/text()").evaluate(document, XPathConstants.NODESET);
|
212
|
if (nl.getLength() > 0) {
|
213
|
r.setOpenaireId(nl.item(0).getNodeValue());
|
214
|
}
|
215
|
nl = (NodeList) xpath.compile("//*[local-name()='concept']/@id").evaluate(document, XPathConstants.NODESET);
|
216
|
if (nl.getLength() > 0) {
|
217
|
context.setOpenaireId(nl.item(0).getNodeValue());
|
218
|
}
|
219
|
} catch (Exception e) {
|
220
|
e.printStackTrace();
|
221
|
|
222
|
}
|
223
|
|
224
|
claim.setTarget(buildResult(r.getOpenaireId(), null,claim.getId(),ClaimUtils.USEAPIRESULTS));
|
225
|
buildContext(context);
|
226
|
claim.setSource(context);
|
227
|
}
|
228
|
static public void getRelationClaim(Claim claim, String xml) throws IOException, SAXException, ParserConfigurationException {
|
229
|
String relationType="";
|
230
|
String sourceId=""; //Annotation source
|
231
|
String targetId=""; //Annotation target
|
232
|
// //<RELATION type='resultProject' source='50|od______1266::af81022e9c489007a8f9ab27c2c725cb' target='40|fct_________::0432268334291febec6d0dbc1f8bae5d' />
|
233
|
|
234
|
try {
|
235
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
236
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
237
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
238
|
Document document=dBuilder.parse(inputSource);
|
239
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
240
|
XPath xpath = xPathfactory.newXPath();
|
241
|
NodeList nl = (NodeList) xpath.compile("//RELATION/@type").evaluate(document, XPathConstants.NODESET);
|
242
|
if (nl.getLength() > 0) {
|
243
|
relationType=(nl.item(0).getNodeValue());
|
244
|
}
|
245
|
nl = (NodeList) xpath.compile("//RELATION/@source").evaluate(document, XPathConstants.NODESET);
|
246
|
if (nl.getLength() > 0) {
|
247
|
targetId=(nl.item(0).getNodeValue());
|
248
|
}
|
249
|
nl = (NodeList) xpath.compile("//RELATION/@target").evaluate(document, XPathConstants.NODESET);
|
250
|
if (nl.getLength() > 0) {
|
251
|
sourceId=(nl.item(0).getNodeValue());
|
252
|
}
|
253
|
} catch (Exception e) {
|
254
|
e.printStackTrace();
|
255
|
|
256
|
}
|
257
|
claim.setTarget(buildResult(targetId, getTargetType(relationType),claim.getId(),ClaimUtils.USEAPIRESULTS));
|
258
|
String bodyType=getBodyType(relationType);
|
259
|
if (bodyType.equals(ClaimUtils.PROJECT)) {
|
260
|
claim.setSource(buildProject(sourceId, claim.getId(),ClaimUtils.USEAPIPROJECTS));
|
261
|
} else {
|
262
|
claim.setSource(buildResult(sourceId, bodyType,claim.getId(),ClaimUtils.USEAPIRESULTS));
|
263
|
bodyType=((Result)claim.getSource()).getResultType();
|
264
|
}
|
265
|
claim.setTargetType(((Result) claim.getTarget()).getResultType());
|
266
|
claim.setSourceType(bodyType);
|
267
|
}
|
268
|
static public void buildContext(Context context){
|
269
|
if(context!=null&&context.getOpenaireId()!=null){
|
270
|
try {
|
271
|
context.setTitle(ContextUtils.extractEgiLabel(context.getOpenaireId()));
|
272
|
}catch (Exception e){
|
273
|
e.printStackTrace();
|
274
|
System.err.println("ContextUtils: Couldn't get Egi label for id "+context.getId());
|
275
|
}
|
276
|
}
|
277
|
}
|
278
|
static Result buildResult(String id, String type,String claimId,boolean useApi) {
|
279
|
Result body= new Result();
|
280
|
if (id.contains("|")) {
|
281
|
id = id.split("\\|")[1];
|
282
|
}
|
283
|
body.setOpenaireId(id);
|
284
|
body.setResultType(type);
|
285
|
if(useApi){
|
286
|
//look for a publication
|
287
|
getResultFromAPI(body, claimId,true);
|
288
|
String openaireId=body.getOpenaireId();
|
289
|
String objId=getObjIdentifierFromSearch(openaireId);
|
290
|
if(!body.isFound()){
|
291
|
if(objId!=null&&!openaireId.equals(objId)){
|
292
|
// not found! look for a publication with objId
|
293
|
//if result not found in API with openaireId, search for it with objIdentifier
|
294
|
body.setOpenaireId(objId);
|
295
|
getResultFromAPI(body, claimId,true);
|
296
|
body.setOpenaireId(openaireId);
|
297
|
//TODO check which one of the ids we should keep!!!!
|
298
|
}
|
299
|
if(!body.isFound()){
|
300
|
//if still not found search for Dataset with the id
|
301
|
getResultFromAPI(body, claimId, false);
|
302
|
if (!body.isFound() && (objId = getObjIdentifierFromSearch(openaireId)) != null && !openaireId.equals(objId)) {
|
303
|
// still not found!! search for Dataset with the onjId
|
304
|
//if result not found in API with openaireId, search for it with objIdentifier
|
305
|
body.setOpenaireId(objId);
|
306
|
getResultFromAPI(body, claimId, false);
|
307
|
body.setOpenaireId(openaireId);
|
308
|
//TODO check which one of the ids we should keep!!!!
|
309
|
}
|
310
|
}
|
311
|
}
|
312
|
//TODO check if it is a dataset
|
313
|
if(!body.isFound()) {
|
314
|
try {
|
315
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
|
316
|
Date date = new java.util.Date();
|
317
|
out.println(new Timestamp(date.getTime()) + " - Result Not Found: " + body.getOpenaireId() + " (API) in claim " + claimId);
|
318
|
out.close();
|
319
|
} catch (IOException e) {
|
320
|
e.printStackTrace();
|
321
|
System.err.println("Couldn't write to file " + "results_not_found.txt");
|
322
|
}
|
323
|
}
|
324
|
}else {
|
325
|
getResultFromSearch(body, claimId);
|
326
|
}
|
327
|
return body;
|
328
|
}
|
329
|
|
330
|
private static void getResultFromSearch(Result r,String claimId){
|
331
|
SearchUtils searchUtils= new SearchUtils();
|
332
|
String searchUri=searchUtils.getResultSearchUrl(r.getOpenaireId());
|
333
|
if(searchUri==null){
|
334
|
return ;
|
335
|
}
|
336
|
System.out.println("Result query: "+searchUri);
|
337
|
try {
|
338
|
String size=null;
|
339
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
340
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
341
|
Document document=dBuilder.parse(searchUri);
|
342
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
343
|
XPath xpath = xPathfactory.newXPath();
|
344
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
345
|
if (nl.getLength() > 0) {
|
346
|
size= nl.item(0).getNodeValue();
|
347
|
}
|
348
|
if(size!=null && Integer.parseInt(size)>0){
|
349
|
nl = (NodeList) xpath.compile("//field[@name='resulttypename']/@value").evaluate(document, XPathConstants.NODESET);
|
350
|
if (nl.getLength() > 0) {
|
351
|
r.setResultType(nl.item(0).getNodeValue());
|
352
|
}
|
353
|
nl = (NodeList) xpath.compile("//field[@name='bestlicense']/@value").evaluate(document, XPathConstants.NODESET);
|
354
|
if (nl.getLength() > 0) {
|
355
|
r.setBestLicense(nl.item(0).getNodeValue());
|
356
|
}
|
357
|
nl = (NodeList) xpath.compile("//field[@name='collectedfrom']/@value").evaluate(document, XPathConstants.NODESET);
|
358
|
if (nl.getLength() > 0) {
|
359
|
r.setCollectedFrom(nl.item(0).getNodeValue());
|
360
|
}
|
361
|
nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
|
362
|
if (nl.getLength() > 0) {
|
363
|
r.setTitle(nl.item(0).getNodeValue());
|
364
|
}
|
365
|
nl = (NodeList) xpath.compile("//field[@name='pid']").evaluate(document, XPathConstants.NODESET);
|
366
|
for(int i=0; i<nl.getLength();i++){
|
367
|
String id="";
|
368
|
String type="";
|
369
|
int count;
|
370
|
|
371
|
|
372
|
NodeList valueNodes = (NodeList) xpath.compile("//field[@name='value']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
|
373
|
if (valueNodes.getLength() > 0) {
|
374
|
id = valueNodes.item(i).getNodeValue();
|
375
|
}
|
376
|
NodeList classNodes = (NodeList) xpath.compile("//field[@name='classid']/@value").evaluate(nl.item(i), XPathConstants.NODESET);
|
377
|
if (classNodes.getLength() > 0) {
|
378
|
type = classNodes.item(i).getNodeValue();
|
379
|
}
|
380
|
if (type.equals("doi") && id != null) {
|
381
|
r.setDoi(id);
|
382
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
383
|
|
384
|
}else if(type.equals("pmc") && id != null) {
|
385
|
r.setPmcid(id);
|
386
|
}
|
387
|
|
388
|
}
|
389
|
|
390
|
|
391
|
}else{
|
392
|
try{
|
393
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("results_not_found.txt", true)));
|
394
|
Date date= new java.util.Date();
|
395
|
out.println(new Timestamp(date.getTime())+" - Result Not Found: "+r.getOpenaireId()+ " in claim "+claimId);
|
396
|
out.close();
|
397
|
}catch (IOException e) {
|
398
|
e.printStackTrace();
|
399
|
System.err.println("Couldn't write to file " + "results_not_found.txt");
|
400
|
}
|
401
|
//System.err.println("Result Not Found " + r.getOpenaireId());
|
402
|
r.setFound(false);
|
403
|
}
|
404
|
|
405
|
} catch (Exception e) {
|
406
|
e.printStackTrace();
|
407
|
return ;
|
408
|
}
|
409
|
}
|
410
|
public static String getObjIdentifierFromSearch(String id){
|
411
|
SearchUtils searchUtils= new SearchUtils();
|
412
|
String searchUri=searchUtils.getResultSearchUrl(id);
|
413
|
String objIdentifier=null;
|
414
|
if(searchUri==null){
|
415
|
return null;
|
416
|
}
|
417
|
System.out.println("Result query: "+searchUri);
|
418
|
try {
|
419
|
String size=null;
|
420
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
421
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
422
|
Document document=dBuilder.parse(searchUri);
|
423
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
424
|
XPath xpath = xPathfactory.newXPath();
|
425
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
426
|
if (nl.getLength() > 0) {
|
427
|
size= nl.item(0).getNodeValue();
|
428
|
}
|
429
|
if(size!=null && Integer.parseInt(size)>0){
|
430
|
//indexId="objIdentifier"
|
431
|
//<field name="resultId" multiplicity="false" indexId="objIdentifier" label="Object id" value="dedup_wf_001::7832a296929028bbe447d66398a0c43a"/>
|
432
|
nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
|
433
|
if (nl.getLength() > 0) {
|
434
|
objIdentifier=nl.item(0).getNodeValue();
|
435
|
}
|
436
|
|
437
|
|
438
|
}
|
439
|
} catch (Exception e) {
|
440
|
e.printStackTrace();
|
441
|
return null;
|
442
|
}
|
443
|
return objIdentifier;
|
444
|
}
|
445
|
private static void getResultFromAPI(Result r,String claimId, boolean isPublication){
|
446
|
SearchUtils searchUtils= new SearchUtils();
|
447
|
String searchUri;
|
448
|
if(isPublication) {
|
449
|
searchUri = searchUtils.getPublicationApiUrl(r.getOpenaireId());
|
450
|
}else{
|
451
|
searchUri = searchUtils.getDatasetApiUrl(r.getOpenaireId());
|
452
|
}
|
453
|
if(searchUri==null){
|
454
|
return;
|
455
|
}
|
456
|
System.out.println("Result query: "+searchUri);
|
457
|
try {
|
458
|
URL obj =obj = new URL(searchUri);
|
459
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
460
|
int responseCode = con.getResponseCode();
|
461
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
462
|
StringBuffer response = new StringBuffer();
|
463
|
String inputLine;
|
464
|
while ((inputLine = in.readLine()) != null) {
|
465
|
response.append(inputLine+"\n");
|
466
|
}
|
467
|
in.close();
|
468
|
String xml = response.toString();
|
469
|
String size=null;
|
470
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
471
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
472
|
//Document document=dBuilder.parse(searchUri);
|
473
|
InputSource is = new InputSource(new StringReader(xml));
|
474
|
Document document= document=dBuilder.parse(is);
|
475
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
476
|
XPath xpath = xPathfactory.newXPath();
|
477
|
|
478
|
|
479
|
NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
|
480
|
if (nl.getLength() > 0) {
|
481
|
size= nl.item(0).getNodeValue();
|
482
|
}
|
483
|
if(size!=null && Integer.parseInt(size)>0){
|
484
|
nl = (NodeList) xpath.compile("//resulttype/@classid").evaluate(document, XPathConstants.NODESET);
|
485
|
if (nl.getLength() > 0) {
|
486
|
r.setResultType(nl.item(0).getNodeValue());
|
487
|
}
|
488
|
nl = (NodeList) xpath.compile("//bestlicense/@classid").evaluate(document, XPathConstants.NODESET);
|
489
|
if (nl.getLength() > 0) {
|
490
|
r.setBestLicense(nl.item(0).getNodeValue());
|
491
|
}
|
492
|
//TODO check this
|
493
|
/* nl = (NodeList) xpath.compile("//collectedfrom/@id").evaluate(document, XPathConstants.NODESET);
|
494
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
|
495
|
for(int i=0;i<nl.getLength();i++) {
|
496
|
String datasourceId=nl.item(i).getNodeValue();
|
497
|
if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
|
498
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
499
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_CROSSREF)) {
|
500
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
|
501
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID_DATACTE)) {
|
502
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
|
503
|
}else if (datasourceId.equals(ClaimUtils.DATASOURCE_ID__ORCID)) {
|
504
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
505
|
}
|
506
|
}*/
|
507
|
// if (nl.getLength() > 0) {
|
508
|
// r.setCollectedFrom(nl.item(0).getNodeValue());
|
509
|
// }
|
510
|
nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
|
511
|
if (nl.getLength() > 0) {
|
512
|
r.setTitle(nl.item(0).getNodeValue());
|
513
|
}
|
514
|
nl = (NodeList) xpath.compile("//pid[@classid='doi']/text()").evaluate(document, XPathConstants.NODESET);
|
515
|
if (nl.getLength() > 0) {
|
516
|
r.setDoi(nl.item(0).getNodeValue());
|
517
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
518
|
}
|
519
|
nl = (NodeList) xpath.compile("//pid[@classid='pmc']/text()").evaluate(document, XPathConstants.NODESET);
|
520
|
if (nl.getLength() > 0) {
|
521
|
r.setPmcid(nl.item(0).getNodeValue());
|
522
|
|
523
|
}
|
524
|
r.setXml(xml);
|
525
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_OPENAIRE);
|
526
|
r.setFound(true);
|
527
|
|
528
|
}else{
|
529
|
r.setFound(false);
|
530
|
}
|
531
|
|
532
|
} catch (Exception e) {
|
533
|
e.printStackTrace();
|
534
|
}
|
535
|
}
|
536
|
public static void getXmlfromDatacite(Result r){
|
537
|
r.setResultType(ClaimUtils.DATASET);
|
538
|
SearchUtils searchUtils= new SearchUtils();
|
539
|
if(r!=null&&r.getDoi()==null){
|
540
|
return ;
|
541
|
}
|
542
|
String xml =searchUtils.getDataciteXmlRecord(r.getDoi());
|
543
|
if(xml==null){
|
544
|
return ;
|
545
|
}
|
546
|
try {
|
547
|
String size=null;
|
548
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
549
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
550
|
InputSource is = new InputSource(new StringReader(xml));
|
551
|
Document document= document=dBuilder.parse(is);
|
552
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
553
|
XPath xpath = xPathfactory.newXPath();
|
554
|
NodeList nl ;
|
555
|
|
556
|
nl = (NodeList) xpath.compile("//j.0:title/text()").evaluate(document, XPathConstants.NODESET);
|
557
|
if (nl.getLength() > 0) {
|
558
|
r.setTitle(nl.item(0).getNodeValue());
|
559
|
}
|
560
|
nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
|
561
|
if (nl.getLength() > 0) {
|
562
|
r.setDoi(nl.item(0).getNodeValue());
|
563
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
564
|
}
|
565
|
r.setXml(xml);
|
566
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_DATACITE);
|
567
|
r.setFound(true);
|
568
|
} catch (Exception e) {
|
569
|
e.printStackTrace();
|
570
|
}
|
571
|
}
|
572
|
public static void getJsonfromCrossref(Result r){
|
573
|
SearchUtils searchUtils= new SearchUtils();
|
574
|
if(r!=null&&r.getDoi()==null){
|
575
|
return ;
|
576
|
}
|
577
|
String xml =searchUtils.getCrossrefJsonRecord(r.getDoi());
|
578
|
if(xml!=null){
|
579
|
r.setXml(xml);
|
580
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_CROSSREF);
|
581
|
}
|
582
|
}
|
583
|
public static void getXmlfromOrcid(Result r){
|
584
|
r.setResultType(ClaimUtils.PUBLICATION);
|
585
|
SearchUtils searchUtils= new SearchUtils();
|
586
|
if(r!=null&&r.getOrcidworkid()==null){
|
587
|
return ;
|
588
|
}
|
589
|
System.out.println("OWI: " + r.getOrcidworkid());
|
590
|
String orcid=r.getOrcidworkid().substring(0,19);
|
591
|
String orcidworkid=r.getOrcidworkid().substring(20,r.getOrcidworkid().length());
|
592
|
//0000-0003-5000-0001
|
593
|
System.out.println("orcid: "+orcid);
|
594
|
System.out.println("OWI: "+orcidworkid);
|
595
|
String xml =searchUtils.getOrcidXmlRecord(orcid);
|
596
|
if(xml==null){
|
597
|
return ;
|
598
|
}
|
599
|
try {
|
600
|
String size=null;
|
601
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
602
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
603
|
InputSource is = new InputSource(new StringReader(xml));
|
604
|
Document document= document=dBuilder.parse(is);
|
605
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
606
|
XPath xpath = xPathfactory.newXPath();
|
607
|
NodeList nl ;
|
608
|
// getParent().toXML()
|
609
|
// <orcid-work put-code="19500531" visibility="public">
|
610
|
String s = (String) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document, XPathConstants.STRING);
|
611
|
System.out.println("Here: "+s);
|
612
|
|
613
|
|
614
|
NodeList worknl = (NodeList) xpath.compile("//orcid-work[@put-code="+orcidworkid+"]").evaluate(document,XPathConstants.NODESET);
|
615
|
if (worknl.getLength() > 0) {
|
616
|
|
617
|
nl = (NodeList) xpath.compile("//orcid-work").evaluate(document, XPathConstants.NODESET);
|
618
|
for (int i = 0; i < nl.getLength(); i++) {
|
619
|
if(!worknl.item(0).isEqualNode(nl.item(i))) {
|
620
|
nl.item(i).getParentNode().removeChild(nl.item(i));
|
621
|
}
|
622
|
}
|
623
|
|
624
|
|
625
|
DOMSource domSource = new DOMSource(document);
|
626
|
StringWriter writer = new StringWriter();
|
627
|
StreamResult result = new StreamResult(writer);
|
628
|
TransformerFactory tf = TransformerFactory.newInstance();
|
629
|
Transformer transformer = tf.newTransformer();
|
630
|
transformer.transform(domSource, result);
|
631
|
System.out.println("XML IN String format is: \n" + writer.toString());
|
632
|
r.setXml(xml);
|
633
|
r.setCollectedFrom(ClaimUtils.COLLECTED_FROM_ORCID);
|
634
|
r.setFound(true);
|
635
|
}
|
636
|
nl = (NodeList) xpath.compile("//j.0:identifier/text()").evaluate(document, XPathConstants.NODESET);
|
637
|
if (nl.getLength() > 0) {
|
638
|
r.setDoi(nl.item(0).getNodeValue());
|
639
|
r.setExternal_url("http://dx.doi.org/"+r.getDoi());
|
640
|
}
|
641
|
} catch (Exception e) {
|
642
|
e.printStackTrace();
|
643
|
}
|
644
|
}
|
645
|
public static Project buildProject(String id, String claimId, boolean useAPI){
|
646
|
Project body=new Project();
|
647
|
if (id.contains("|")) {
|
648
|
id = id.split("\\|")[1];
|
649
|
}
|
650
|
body.setOpenaireId(id);
|
651
|
if(useAPI){
|
652
|
getProjectFromAPI(body,claimId);
|
653
|
return body;
|
654
|
}else{
|
655
|
getProjectFromSearch(body, claimId);
|
656
|
return body;
|
657
|
}
|
658
|
|
659
|
|
660
|
}
|
661
|
|
662
|
private static void getProjectFromSearch(Project project, String claimId){
|
663
|
SearchUtils s=new SearchUtils();
|
664
|
String searchUri=s.getProjectSearchUrl(project.getOpenaireId());
|
665
|
if(searchUri==null){
|
666
|
return ;
|
667
|
}
|
668
|
System.out.println("Project query: "+searchUri);
|
669
|
try {
|
670
|
String size=null;
|
671
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
672
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
673
|
Document document=dBuilder.parse(searchUri);
|
674
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
675
|
XPath xpath = xPathfactory.newXPath();
|
676
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
677
|
if (nl.getLength() > 0) {
|
678
|
size= nl.item(0).getNodeValue();
|
679
|
}
|
680
|
if(size!=null && Integer.parseInt(size)>0){
|
681
|
/*
|
682
|
<field name="name" indexId="projectacronym" multiplicity="true" label=colle value="OPENAIRE"/><field name="code" indexId="projectcode" multiplicity="true" label="Project code" value="246686"/><field name="title" indexId="projecttitle" multiplicity="true" label="Title" value="Open Access Infrastructure for Research in Europe"/>
|
683
|
*/
|
684
|
nl = (NodeList) xpath.compile("//field[@name='title']/@value").evaluate(document, XPathConstants.NODESET);
|
685
|
if (nl.getLength() > 0) {
|
686
|
project.setName(nl.item(0).getNodeValue());
|
687
|
}
|
688
|
nl = (NodeList) xpath.compile("//field[@name='name']/@value").evaluate(document, XPathConstants.NODESET);
|
689
|
if (nl.getLength() > 0) {
|
690
|
project.setAcronym(nl.item(0).getNodeValue());
|
691
|
}
|
692
|
/*
|
693
|
<test/><field name="funder" multiplicity="true"><field name="funderid" indexId="funderid" label="" value="ec__________::EC"/><field name="fundershortname" indexId="fundershortname" label="" value="EC"/><field name="fundername" indexId="fundername" label="" value="European Commission"/></field>
|
694
|
*/
|
695
|
|
696
|
nl = (NodeList) xpath.compile("//field[@name='fundername']/@value").evaluate(document, XPathConstants.NODESET);
|
697
|
if (nl.getLength() > 0) {
|
698
|
project.setFunderName(nl.item(0).getNodeValue());
|
699
|
}
|
700
|
nl = (NodeList) xpath.compile("//field[@name='funderid']/@value").evaluate(document, XPathConstants.NODESET);
|
701
|
if (nl.getLength() > 0) {
|
702
|
project.setFunderId(nl.item(0).getNodeValue());
|
703
|
}
|
704
|
|
705
|
}else{
|
706
|
try{
|
707
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
|
708
|
Date date= new java.util.Date();
|
709
|
out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
|
710
|
out.close();
|
711
|
}catch (IOException e) {
|
712
|
e.printStackTrace();
|
713
|
System.err.println("Couldn't write to file " + "projects_not_found.txt");
|
714
|
}
|
715
|
//System.err.println("PROJECT Not Found " + project.getOpenaireId());
|
716
|
project.setFound(false);
|
717
|
|
718
|
}
|
719
|
|
720
|
} catch (Exception e) {
|
721
|
}
|
722
|
}
|
723
|
private static void getProjectFromAPI(Project project, String claimId){
|
724
|
|
725
|
|
726
|
SearchUtils s=new SearchUtils();
|
727
|
String searchUri=s.getProjectApiUrl(project.getOpenaireId());
|
728
|
searchUri="http://api.openaire.eu/search//projects?format=xml&openaireParticipantID=dedup_wf_001::82c87f641bb6219626a0ceca81e0d434";
|
729
|
if(searchUri==null){
|
730
|
return ;
|
731
|
}
|
732
|
//TODO the parsing from API
|
733
|
System.out.println("Project query: "+searchUri);
|
734
|
try {
|
735
|
URL obj =obj = new URL(searchUri);
|
736
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
737
|
int responseCode = con.getResponseCode();
|
738
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
739
|
StringBuffer response = new StringBuffer();
|
740
|
String inputLine;
|
741
|
while ((inputLine = in.readLine()) != null) {
|
742
|
response.append(inputLine);
|
743
|
}
|
744
|
in.close();
|
745
|
String xml = response.toString();
|
746
|
String size=null;
|
747
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
748
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
749
|
//Document document=dBuilder.parse(searchUri);
|
750
|
InputSource is = new InputSource(new StringReader(xml));
|
751
|
Document document= document=dBuilder.parse(is);
|
752
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
753
|
XPath xpath = xPathfactory.newXPath();
|
754
|
|
755
|
|
756
|
NodeList nl = (NodeList) xpath.compile("/response/header/total/text()").evaluate(document, XPathConstants.NODESET);
|
757
|
if (nl.getLength() > 0) {
|
758
|
size= nl.item(0).getNodeValue();
|
759
|
}
|
760
|
if(size!=null && Integer.parseInt(size)>0){
|
761
|
|
762
|
nl = (NodeList) xpath.compile("//title/text()").evaluate(document, XPathConstants.NODESET);
|
763
|
if (nl.getLength() > 0) {
|
764
|
project.setName(nl.item(0).getNodeValue());
|
765
|
}
|
766
|
nl = (NodeList) xpath.compile("//acronym/text()").evaluate(document, XPathConstants.NODESET);
|
767
|
if (nl.getLength() > 0) {
|
768
|
project.setAcronym(nl.item(0).getNodeValue());
|
769
|
}
|
770
|
nl = (NodeList) xpath.compile("//funder/name/text()").evaluate(document, XPathConstants.NODESET);
|
771
|
if (nl.getLength() > 0) {
|
772
|
project.setFunderName(nl.item(0).getNodeValue());
|
773
|
}
|
774
|
nl = (NodeList) xpath.compile("//funder/id/text()").evaluate(document, XPathConstants.NODESET);
|
775
|
if (nl.getLength() > 0) {
|
776
|
project.setFunderId(nl.item(0).getNodeValue());
|
777
|
}
|
778
|
|
779
|
project.setFound(true);
|
780
|
|
781
|
}else{
|
782
|
try{
|
783
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("projects_not_found.txt", true)));
|
784
|
Date date= new java.util.Date();
|
785
|
out.println(new Timestamp(date.getTime())+" - Projects Not Found: "+project.getOpenaireId()+ " in claim "+claimId);
|
786
|
out.close();
|
787
|
}catch (IOException e) {
|
788
|
e.printStackTrace();
|
789
|
System.err.println("Couldn't write to file " + "projects_not_found.txt");
|
790
|
}
|
791
|
//System.err.println("PROJECT Not Found " + project.getOpenaireId());
|
792
|
project.setFound(false);
|
793
|
}
|
794
|
|
795
|
} catch (Exception e) {
|
796
|
e.printStackTrace();
|
797
|
}
|
798
|
}
|
799
|
}
|