1
|
package eu.dnetlib.data.claimsDemo;
|
2
|
|
3
|
import eu.dnetlib.data.claims.migration.ClaimValidation;
|
4
|
import org.apache.log4j.Logger;
|
5
|
import org.w3c.dom.Document;
|
6
|
import org.w3c.dom.NodeList;
|
7
|
import org.xml.sax.InputSource;
|
8
|
|
9
|
import javax.xml.parsers.DocumentBuilder;
|
10
|
import javax.xml.parsers.DocumentBuilderFactory;
|
11
|
import javax.xml.xpath.XPath;
|
12
|
import javax.xml.xpath.XPathConstants;
|
13
|
import javax.xml.xpath.XPathFactory;
|
14
|
import java.io.*;
|
15
|
import java.net.HttpURLConnection;
|
16
|
import java.net.URL;
|
17
|
import java.net.URLEncoder;
|
18
|
import java.sql.Timestamp;
|
19
|
import java.util.Date;
|
20
|
|
21
|
/**
|
22
|
* Created by argirok on 20/11/2015.
|
23
|
*/
|
24
|
/*
|
25
|
*Search and Parsing xmls from Search Service
|
26
|
*Search for DOIs
|
27
|
*
|
28
|
*/
|
29
|
public class SearchUtils {
|
30
|
private static final Logger logger = Logger.getLogger(SearchUtils.class);
|
31
|
|
32
|
private static String searchUrl="http://beta.services.openaire.eu:8480/search/search?action=search";
|
33
|
private static String apiUrlForResults="http://beta.services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
|
34
|
// private static String apiUrlForDedupResults="http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
35
|
private static String apiUrlForProjects="http://beta.services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
36
|
|
37
|
private static String searchUrlProduction="http://services.openaire.eu:8480/search/search?action=search";
|
38
|
private static String apiUrlForResultsProduction="http://services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
|
39
|
private static String apiUrlForProjectsProduction="http://services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
|
40
|
|
41
|
|
42
|
private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
|
43
|
private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
|
44
|
private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
|
45
|
private static String orcidUrlPrefix="https://pub.orcid.org/v2.1/";
|
46
|
private static String orcidUrlSuffix="/works";
|
47
|
// private static String contextsAPIUrl="https://dev-openaire.d4science.org/openaire/context";
|
48
|
private static String contextsAPIUrl="http://beta.services.openaire.eu:8080/openaire/context";
|
49
|
private static String contextsAPIUrlProduction="http://services.openaire.eu:8080/openaire/context";
|
50
|
private boolean useApi=true;
|
51
|
private ClaimValidation claimValidation = null;
|
52
|
|
53
|
public SearchUtils(){
|
54
|
|
55
|
}
|
56
|
|
57
|
public static String fetchProjectXmlFromIndex(String id, boolean production) throws Exception{
|
58
|
return getRequest(getProjectApiUrl(id, production));
|
59
|
}
|
60
|
|
61
|
/**
|
62
|
*Look up in API, if there is no result,
|
63
|
* consider that id is a dedup id and search for the objIdentifier id.
|
64
|
* Then look up again in the API with the objIdentifier
|
65
|
* @param id
|
66
|
* @return xml or null
|
67
|
* @throws Exception
|
68
|
*/
|
69
|
public static String fetchPublicationXmlFromIndex(String id, boolean production) throws Exception{
|
70
|
|
71
|
String xml=getRequest(getPublicationApiUrl(id, production));
|
72
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
73
|
xml=null;
|
74
|
}
|
75
|
return xml;
|
76
|
}
|
77
|
public static String fetchDedupXmlFromIndex(String id, boolean production) throws Exception{
|
78
|
|
79
|
String xml= null;
|
80
|
String objId=getResultObjIdentifierFromSearch(id,production);
|
81
|
if(objId!=null){
|
82
|
xml=getRequest(getPublicationApiUrl(objId, production));
|
83
|
}
|
84
|
return xml;
|
85
|
}
|
86
|
|
87
|
public static String fetchSoftwareXmlFromIndex(String id, boolean production) throws Exception{
|
88
|
String xml=getRequest(getSoftwareApiUrl(id,production));
|
89
|
return xml;
|
90
|
}
|
91
|
|
92
|
public static String fetchOtherXmlFromIndex(String id, boolean production) throws Exception{
|
93
|
String xml=getRequest(getOtherApiUrl(id,production));
|
94
|
return xml;
|
95
|
}
|
96
|
/**
|
97
|
*Look up in API, if there is no result,
|
98
|
* consider that id is a dedup id and search for the objIdentifier id.
|
99
|
* Then look up again in the API with the objIdentifier
|
100
|
*
|
101
|
* @param id
|
102
|
* @return xml
|
103
|
* @throws Exception
|
104
|
*/
|
105
|
public static String fetchDatasetXmlFromIndex(String id, boolean production) throws Exception{
|
106
|
|
107
|
String xml=getRequest(getDatasetApiUrl(id, production));
|
108
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
109
|
xml=null;
|
110
|
}
|
111
|
return xml;
|
112
|
|
113
|
}
|
114
|
|
115
|
public static String fetchResultXMLFromDatacite(String id) throws Exception{
|
116
|
return getRequest(getDataciteUrl(id));
|
117
|
}
|
118
|
public static String fetchResultXMLFromOrcid(String id) throws Exception{
|
119
|
|
120
|
return getRequest(getOrcidUrl(id));
|
121
|
}
|
122
|
public static String fetchContext(String suffix, boolean production) throws Exception{
|
123
|
|
124
|
return getRequest(getContextsAPIUrl(production)+suffix);
|
125
|
}
|
126
|
|
127
|
public static String getProjectApiUrl(String id, boolean production) {
|
128
|
|
129
|
return ((production)?apiUrlForProjectsProduction:apiUrlForProjects)+"/projects?openaireProjectID="+id;
|
130
|
}
|
131
|
|
132
|
public static String getDatasetApiUrl(String id, boolean production) {
|
133
|
|
134
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/datasets?openaireDatasetID="+id;
|
135
|
}
|
136
|
|
137
|
public static String getPublicationApiUrl(String id, boolean production) {
|
138
|
|
139
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/publications?openairePublicationID="+id;
|
140
|
}
|
141
|
public static String getSoftwareApiUrl(String id, boolean production) {
|
142
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/software?openaireSoftwareID="+id;
|
143
|
}
|
144
|
public static String getOtherApiUrl(String id, boolean production) {
|
145
|
return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/other?openaireOtherID="+id;
|
146
|
}
|
147
|
// public static String getDedupPublicationApiUrl(String id) {
|
148
|
//
|
149
|
// return apiUrlForDedupResults+"/publications?openairePublicationID="+id;
|
150
|
// }
|
151
|
private static String getDataciteUrl(String id) {
|
152
|
|
153
|
return dataciteNewAPIUrl+id;
|
154
|
}
|
155
|
|
156
|
private static String getOrcidUrl(String id) {
|
157
|
|
158
|
return orcidUrlPrefix+id+orcidUrlSuffix;
|
159
|
}
|
160
|
private static String getContextsAPIUrl( boolean production) {
|
161
|
|
162
|
return ((production)?contextsAPIUrlProduction:contextsAPIUrl);
|
163
|
}
|
164
|
|
165
|
/**
|
166
|
* Get result and objIdentifier form openaire Search
|
167
|
* @param resultdupid
|
168
|
* @return
|
169
|
*/
|
170
|
private static String getResultXMLByResultdupidSearchByUrl(String resultdupid, boolean production) {
|
171
|
String url= null;
|
172
|
try {
|
173
|
url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
|
174
|
URLEncoder.encode("(((deletedbyinference = false) AND (oaftype exact result)) )" +
|
175
|
" and (resultdupid exact " + resultdupid + ")", "UTF-8")
|
176
|
+"&size=10&locale=en_GB";
|
177
|
} catch (UnsupportedEncodingException e) {
|
178
|
logger.error("UnsupportedEncodingException",e);
|
179
|
}
|
180
|
return url;
|
181
|
}
|
182
|
// private static String getSoftwareXMLByIdSearchByUrl(String id, boolean production) {
|
183
|
// String url= null;
|
184
|
// try {
|
185
|
// url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
|
186
|
// URLEncoder.encode("(((deletedbyinference = false) AND (resulttypeid exact software) AND (oaftype exact result)) )" +
|
187
|
// " and (objIdentifier exact " + id + ")", "UTF-8")
|
188
|
// +"&size=1&locale=en_GB";
|
189
|
// } catch (UnsupportedEncodingException e) {
|
190
|
// logger.error("UnsupportedEncodingException",e);
|
191
|
// }
|
192
|
// return url;
|
193
|
// }
|
194
|
|
195
|
// HTTP GET request
|
196
|
private static String getRequest(String url) throws Exception {
|
197
|
URL obj = new URL(url);
|
198
|
logger.debug(url);
|
199
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
200
|
int responseCode = con.getResponseCode();
|
201
|
if(responseCode != 200){
|
202
|
return null;
|
203
|
}
|
204
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
205
|
StringBuffer response = new StringBuffer();
|
206
|
String inputLine;
|
207
|
while ((inputLine = in.readLine()) != null) {
|
208
|
response.append(inputLine).append("\n");
|
209
|
}
|
210
|
in.close();
|
211
|
return response.toString();
|
212
|
}
|
213
|
|
214
|
public static String getCrossrefJsonRecord(String doi){
|
215
|
String url=crossrefUrl+doi;
|
216
|
URL obj = null;
|
217
|
String responseStr=null;
|
218
|
try {
|
219
|
obj = new URL(url);
|
220
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
221
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
222
|
StringBuffer response = new StringBuffer();
|
223
|
String inputLine;
|
224
|
while ((inputLine = in.readLine()) != null) {
|
225
|
response.append(inputLine).append("\n");
|
226
|
}
|
227
|
in.close();
|
228
|
responseStr=response.toString();
|
229
|
if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
|
230
|
responseStr=null;
|
231
|
}
|
232
|
} catch (Exception e) {
|
233
|
try{
|
234
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
|
235
|
Date date= new java.util.Date();
|
236
|
out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
|
237
|
out.close();
|
238
|
|
239
|
}catch (IOException e1) {
|
240
|
logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
|
241
|
}
|
242
|
|
243
|
}
|
244
|
return responseStr;
|
245
|
}
|
246
|
|
247
|
/**
|
248
|
*
|
249
|
* @param doi
|
250
|
* @return true if the given doi request returns a valid answer otherwise false
|
251
|
* @throws IOException
|
252
|
*/
|
253
|
public static boolean isDoiValid(String doi) throws IOException {
|
254
|
boolean found=false;
|
255
|
String responseStr=getCrossrefJsonRecord(doi);
|
256
|
if(responseStr!=null){
|
257
|
found=true;
|
258
|
}
|
259
|
return found;
|
260
|
}
|
261
|
|
262
|
/**
|
263
|
* Search in index for result with resultdupid
|
264
|
* @param resultdupid Openaire Id
|
265
|
* @return objIdentifier
|
266
|
*/
|
267
|
private static String getResultObjIdentifierFromSearch(String resultdupid, boolean production) throws Exception {
|
268
|
String xml=getRequest(getResultXMLByResultdupidSearchByUrl(resultdupid,production));
|
269
|
String objIdentifier=null;
|
270
|
String size=null;
|
271
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
272
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
273
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
274
|
Document document=dBuilder.parse(inputSource);
|
275
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
276
|
XPath xpath = xPathfactory.newXPath();
|
277
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
278
|
if (nl.getLength() > 0) {
|
279
|
size= nl.item(0).getNodeValue();
|
280
|
}
|
281
|
if(size!=null && Integer.parseInt(size)>0){
|
282
|
nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
|
283
|
if (nl.getLength() > 0) {
|
284
|
objIdentifier=nl.item(0).getNodeValue();
|
285
|
}
|
286
|
}
|
287
|
return objIdentifier;
|
288
|
}
|
289
|
|
290
|
/**
|
291
|
*
|
292
|
* @param xml : API result xml
|
293
|
* @return number of results Found
|
294
|
* @throws Exception
|
295
|
*/
|
296
|
public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
|
297
|
if(xml==null){
|
298
|
return 0;
|
299
|
}
|
300
|
String totalStr=null;
|
301
|
Integer total=0;
|
302
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
303
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
304
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
305
|
Document document=dBuilder.parse(inputSource);
|
306
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
307
|
XPath xpath = xPathfactory.newXPath();
|
308
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
309
|
if (nl.getLength() > 0) {
|
310
|
totalStr= nl.item(0).getNodeValue();
|
311
|
}
|
312
|
if(totalStr!=null){
|
313
|
total=Integer.parseInt(totalStr);
|
314
|
}
|
315
|
return total;
|
316
|
}
|
317
|
|
318
|
public ClaimValidation getClaimValidation() {
|
319
|
return claimValidation;
|
320
|
}
|
321
|
|
322
|
public void setClaimValidation(ClaimValidation claimValidation) {
|
323
|
this.claimValidation = claimValidation;
|
324
|
}
|
325
|
}
|
326
|
|
327
|
|