1
|
package eu.dnetlib.data.claimsDemo;
|
2
|
|
3
|
import eu.dnetlib.data.claims.migration.ClaimValidation;
|
4
|
import org.apache.log4j.Logger;
|
5
|
import org.w3c.dom.Document;
|
6
|
import org.w3c.dom.NodeList;
|
7
|
import org.xml.sax.InputSource;
|
8
|
|
9
|
import javax.xml.parsers.DocumentBuilder;
|
10
|
import javax.xml.parsers.DocumentBuilderFactory;
|
11
|
import javax.xml.xpath.XPath;
|
12
|
import javax.xml.xpath.XPathConstants;
|
13
|
import javax.xml.xpath.XPathFactory;
|
14
|
import java.io.*;
|
15
|
import java.net.HttpURLConnection;
|
16
|
import java.net.URL;
|
17
|
import java.net.URLEncoder;
|
18
|
import java.sql.Timestamp;
|
19
|
import java.util.Date;
|
20
|
|
21
|
/**
|
22
|
* Created by argirok on 20/11/2015.
|
23
|
*/
|
24
|
/*
|
25
|
*Search and Parsing xmls from Search Service
|
26
|
*Search for DOIs
|
27
|
*
|
28
|
*/
|
29
|
public class SearchUtils {
|
30
|
private static final Logger logger = Logger.getLogger(SearchUtils.class);
|
31
|
|
32
|
private static String searchUrl="http://beta.services.openaire.eu:8480/search/search?action=search";
|
33
|
private static String apiUrlForResults="http://beta.services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
|
34
|
// private static String apiUrlForDedupResults="http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
35
|
private static String apiUrlForProjects="http://beta.services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
36
|
|
37
|
private static String searchUrlProduction="http://services.openaire.eu:8480/search/search?action=search";
|
38
|
private static String apiUrlForResultsProduction="http://services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
|
39
|
private static String apiUrlForProjectsProduction="http://services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
40
|
|
41
|
|
42
|
private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
|
43
|
private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
|
44
|
private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
|
45
|
private static String orcidUrlPrefix="https://pub.orcid.org/v2.1/";
|
46
|
private static String orcidUrlSuffix="/works";
|
47
|
// private static String contextsAPIUrl="https://dev-openaire.d4science.org/openaire/context";
|
48
|
private static String contextsAPIUrl="http://beta.services.openaire.eu:8080/openaire/context";
|
49
|
private boolean useApi=true;
|
50
|
private ClaimValidation claimValidation = null;
|
51
|
|
52
|
public SearchUtils(){
|
53
|
|
54
|
}
|
55
|
|
56
|
public static String fetchProjectXmlFromIndex(String id, boolean production) throws Exception{
|
57
|
return getRequest(getProjectApiUrl(id, production));
|
58
|
}
|
59
|
|
60
|
/**
|
61
|
*Look up in API, if there is no result,
|
62
|
* consider that id is a dedup id and search for the objIdentifier id.
|
63
|
* Then look up again in the API with the objIdentifier
|
64
|
* @param id
|
65
|
* @return xml or null
|
66
|
* @throws Exception
|
67
|
*/
|
68
|
public static String fetchPublicationXmlFromIndex(String id, boolean production) throws Exception{
|
69
|
|
70
|
String xml=getRequest(getPublicationApiUrl(id, production));
|
71
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
72
|
xml=null;
|
73
|
}
|
74
|
return xml;
|
75
|
}
|
76
|
public static String fetchDedupXmlFromIndex(String id, boolean production) throws Exception{
|
77
|
|
78
|
String xml= null;
|
79
|
String objId=getResultObjIdentifierFromSearch(id,production);
|
80
|
if(objId!=null){
|
81
|
xml=getRequest(getPublicationApiUrl(objId, production));
|
82
|
}
|
83
|
return xml;
|
84
|
}
|
85
|
|
86
|
public static String fetchSoftwareXmlFromIndex(String id, boolean production) throws Exception{
|
87
|
String xml=getRequest(getSoftwareApiUrl(id,production));
|
88
|
return xml;
|
89
|
}
|
90
|
|
91
|
|
92
|
/**
|
93
|
*Look up in API, if there is no result,
|
94
|
* consider that id is a dedup id and search for the objIdentifier id.
|
95
|
* Then look up again in the API with the objIdentifier
|
96
|
*
|
97
|
* @param id
|
98
|
* @return xml
|
99
|
* @throws Exception
|
100
|
*/
|
101
|
public static String fetchDatasetXmlFromIndex(String id, boolean production) throws Exception{
|
102
|
|
103
|
String xml=getRequest(getDatasetApiUrl(id, production));
|
104
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
105
|
xml=null;
|
106
|
}
|
107
|
return xml;
|
108
|
|
109
|
}
|
110
|
|
111
|
public static String fetchResultXMLFromDatacite(String id) throws Exception{
|
112
|
return getRequest(getDataciteUrl(id));
|
113
|
}
|
114
|
public static String fetchResultXMLFromOrcid(String id) throws Exception{
|
115
|
|
116
|
return getRequest(getOrcidUrl(id));
|
117
|
}
|
118
|
public static String fetchContext(String suffix) throws Exception{
|
119
|
|
120
|
return getRequest(getContextsAPIUrl()+suffix);
|
121
|
}
|
122
|
|
123
|
public static String getProjectApiUrl(String id, boolean production) {
|
124
|
|
125
|
return ((production)?apiUrlForProjectsProduction:apiUrlForProjects)+"/projects?openaireProjectID="+id;
|
126
|
}
|
127
|
|
128
|
public static String getDatasetApiUrl(String id, boolean production) {
|
129
|
|
130
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/datasets?openaireDatasetID="+id;
|
131
|
}
|
132
|
|
133
|
public static String getPublicationApiUrl(String id, boolean production) {
|
134
|
|
135
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/publications?openairePublicationID="+id;
|
136
|
}
|
137
|
public static String getSoftwareApiUrl(String id, boolean production) {
|
138
|
//TODO change to beta when it's ready
|
139
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/software?openaireSoftwareID="+id;
|
140
|
// return "http://rudie.di.uoa.gr:8080/dnet-functionality-services-2.0.0-SNAPSHOT/api"+"/software?openaireSoftwareID="+id;
|
141
|
}
|
142
|
// public static String getDedupPublicationApiUrl(String id) {
|
143
|
//
|
144
|
// return apiUrlForDedupResults+"/publications?openairePublicationID="+id;
|
145
|
// }
|
146
|
private static String getDataciteUrl(String id) {
|
147
|
|
148
|
return dataciteNewAPIUrl+id;
|
149
|
}
|
150
|
|
151
|
private static String getOrcidUrl(String id) {
|
152
|
|
153
|
return orcidUrlPrefix+id+orcidUrlSuffix;
|
154
|
}
|
155
|
private static String getContextsAPIUrl() {
|
156
|
|
157
|
return contextsAPIUrl;
|
158
|
}
|
159
|
|
160
|
/**
|
161
|
* Get result and objIdentifier form openaire Search
|
162
|
* @param resultdupid
|
163
|
* @return
|
164
|
*/
|
165
|
private static String getResultXMLByResultdupidSearchByUrl(String resultdupid, boolean production) {
|
166
|
String url= null;
|
167
|
try {
|
168
|
url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
|
169
|
URLEncoder.encode("(((deletedbyinference = false) AND (oaftype exact result)) )" +
|
170
|
" and (resultdupid exact " + resultdupid + ")", "UTF-8")
|
171
|
+"&size=10&locale=en_GB";
|
172
|
} catch (UnsupportedEncodingException e) {
|
173
|
logger.error("UnsupportedEncodingException",e);
|
174
|
}
|
175
|
return url;
|
176
|
}
|
177
|
// private static String getSoftwareXMLByIdSearchByUrl(String id, boolean production) {
|
178
|
// String url= null;
|
179
|
// try {
|
180
|
// url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
|
181
|
// URLEncoder.encode("(((deletedbyinference = false) AND (resulttypeid exact software) AND (oaftype exact result)) )" +
|
182
|
// " and (objIdentifier exact " + id + ")", "UTF-8")
|
183
|
// +"&size=1&locale=en_GB";
|
184
|
// } catch (UnsupportedEncodingException e) {
|
185
|
// logger.error("UnsupportedEncodingException",e);
|
186
|
// }
|
187
|
// return url;
|
188
|
// }
|
189
|
|
190
|
// HTTP GET request
|
191
|
private static String getRequest(String url) throws Exception {
|
192
|
URL obj = new URL(url);
|
193
|
logger.debug(url);
|
194
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
195
|
int responseCode = con.getResponseCode();
|
196
|
if(responseCode != 200){
|
197
|
return null;
|
198
|
}
|
199
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
200
|
StringBuffer response = new StringBuffer();
|
201
|
String inputLine;
|
202
|
while ((inputLine = in.readLine()) != null) {
|
203
|
response.append(inputLine).append("\n");
|
204
|
}
|
205
|
in.close();
|
206
|
return response.toString();
|
207
|
}
|
208
|
|
209
|
public static String getCrossrefJsonRecord(String doi){
|
210
|
String url=crossrefUrl+doi;
|
211
|
URL obj = null;
|
212
|
String responseStr=null;
|
213
|
try {
|
214
|
obj = new URL(url);
|
215
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
216
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
217
|
StringBuffer response = new StringBuffer();
|
218
|
String inputLine;
|
219
|
while ((inputLine = in.readLine()) != null) {
|
220
|
response.append(inputLine).append("\n");
|
221
|
}
|
222
|
in.close();
|
223
|
responseStr=response.toString();
|
224
|
if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
|
225
|
responseStr=null;
|
226
|
}
|
227
|
} catch (Exception e) {
|
228
|
try{
|
229
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
|
230
|
Date date= new java.util.Date();
|
231
|
out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
|
232
|
out.close();
|
233
|
|
234
|
}catch (IOException e1) {
|
235
|
logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
|
236
|
}
|
237
|
|
238
|
}
|
239
|
return responseStr;
|
240
|
}
|
241
|
|
242
|
/**
|
243
|
*
|
244
|
* @param doi
|
245
|
* @return true if the given doi request returns a valid answer otherwise false
|
246
|
* @throws IOException
|
247
|
*/
|
248
|
public static boolean isDoiValid(String doi) throws IOException {
|
249
|
boolean found=false;
|
250
|
String responseStr=getCrossrefJsonRecord(doi);
|
251
|
if(responseStr!=null){
|
252
|
found=true;
|
253
|
}
|
254
|
return found;
|
255
|
}
|
256
|
|
257
|
/**
|
258
|
* Search in index for result with resultdupid
|
259
|
* @param resultdupid Openaire Id
|
260
|
* @return objIdentifier
|
261
|
*/
|
262
|
private static String getResultObjIdentifierFromSearch(String resultdupid, boolean production) throws Exception {
|
263
|
String xml=getRequest(getResultXMLByResultdupidSearchByUrl(resultdupid,production));
|
264
|
String objIdentifier=null;
|
265
|
String size=null;
|
266
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
267
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
268
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
269
|
Document document=dBuilder.parse(inputSource);
|
270
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
271
|
XPath xpath = xPathfactory.newXPath();
|
272
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
273
|
if (nl.getLength() > 0) {
|
274
|
size= nl.item(0).getNodeValue();
|
275
|
}
|
276
|
if(size!=null && Integer.parseInt(size)>0){
|
277
|
nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
|
278
|
if (nl.getLength() > 0) {
|
279
|
objIdentifier=nl.item(0).getNodeValue();
|
280
|
}
|
281
|
}
|
282
|
return objIdentifier;
|
283
|
}
|
284
|
|
285
|
/**
|
286
|
*
|
287
|
* @param xml : API result xml
|
288
|
* @return number of results Found
|
289
|
* @throws Exception
|
290
|
*/
|
291
|
public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
|
292
|
if(xml==null){
|
293
|
return 0;
|
294
|
}
|
295
|
String totalStr=null;
|
296
|
Integer total=0;
|
297
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
298
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
299
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
300
|
Document document=dBuilder.parse(inputSource);
|
301
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
302
|
XPath xpath = xPathfactory.newXPath();
|
303
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
304
|
if (nl.getLength() > 0) {
|
305
|
totalStr= nl.item(0).getNodeValue();
|
306
|
}
|
307
|
if(totalStr!=null){
|
308
|
total=Integer.parseInt(totalStr);
|
309
|
}
|
310
|
return total;
|
311
|
}
|
312
|
|
313
|
public ClaimValidation getClaimValidation() {
|
314
|
return claimValidation;
|
315
|
}
|
316
|
|
317
|
public void setClaimValidation(ClaimValidation claimValidation) {
|
318
|
this.claimValidation = claimValidation;
|
319
|
}
|
320
|
}
|
321
|
|
322
|
|