1
|
package eu.dnetlib.data.claims.utils;
|
2
|
|
3
|
import org.apache.log4j.Logger;
|
4
|
import org.w3c.dom.Document;
|
5
|
import org.w3c.dom.NodeList;
|
6
|
import org.xml.sax.InputSource;
|
7
|
|
8
|
import javax.xml.parsers.DocumentBuilder;
|
9
|
import javax.xml.parsers.DocumentBuilderFactory;
|
10
|
import javax.xml.xpath.XPath;
|
11
|
import javax.xml.xpath.XPathConstants;
|
12
|
import javax.xml.xpath.XPathFactory;
|
13
|
import java.io.*;
|
14
|
import java.net.HttpURLConnection;
|
15
|
import java.net.URL;
|
16
|
import java.net.URLEncoder;
|
17
|
import java.sql.Timestamp;
|
18
|
import java.util.Date;
|
19
|
|
20
|
/**
|
21
|
* Created by argirok on 20/11/2015.
|
22
|
*/
|
23
|
/*
|
24
|
*Search and Parsing xmls from Search Service
|
25
|
*Search for DOIs
|
26
|
*
|
27
|
*/
|
28
|
public class SearchUtils {
|
29
|
private static final Logger logger = Logger.getLogger(SearchUtils.class);
|
30
|
public static String searchAPIUrl=null;//"http://beta.services.openaire.eu:8480/search/";
|
31
|
|
32
|
private static String crossrefUrl=null;
|
33
|
private static String dataciteUrl=null;
|
34
|
private static String orcidUrlPrefix=null;
|
35
|
private static String orcidUrlSuffix=null;
|
36
|
private static String contextsAPIUrl=null;
|
37
|
private ClaimValidation claimValidation = null;
|
38
|
|
39
|
public SearchUtils(){
|
40
|
|
41
|
}
|
42
|
|
43
|
public String fetchProjectXmlFromIndex(String id) throws Exception{
|
44
|
return getRequest(getProjectApiUrl(id));
|
45
|
}
|
46
|
|
47
|
/**
|
48
|
*Look up in API, if there is no result,
|
49
|
* consider that id is a dedup id and search for the objIdentifier id.
|
50
|
* Then look up again in the API with the objIdentifier
|
51
|
* @param id
|
52
|
* @return xml or null
|
53
|
* @throws Exception
|
54
|
*/
|
55
|
public static String fetchPublicationXmlFromIndex(String id) throws Exception{
|
56
|
|
57
|
String xml=getRequest(getPublicationApiUrl(id));
|
58
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
59
|
xml=null;
|
60
|
}
|
61
|
return xml;
|
62
|
}
|
63
|
|
64
|
public static String fetchSoftwareXmlFromIndex(String id) throws Exception{
|
65
|
String xml=getRequest(getSoftwareApiUrl(id));
|
66
|
return xml;
|
67
|
}
|
68
|
|
69
|
public static String fetchOtherXmlFromIndex(String id) throws Exception{
|
70
|
String xml=getRequest(getOtherApiUrl(id));
|
71
|
return xml;
|
72
|
}
|
73
|
/**
|
74
|
*Look up in API, if there is no result,
|
75
|
* consider that id is a dedup id and search for the objIdentifier id.
|
76
|
* Then look up again in the API with the objIdentifier
|
77
|
*
|
78
|
* @param id
|
79
|
* @return xml
|
80
|
* @throws Exception
|
81
|
*/
|
82
|
public String fetchDatasetXmlFromIndex(String id) throws Exception{
|
83
|
|
84
|
String xml=getRequest(getDatasetApiUrl(id));
|
85
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
86
|
xml=null;
|
87
|
}
|
88
|
return xml;
|
89
|
|
90
|
}
|
91
|
|
92
|
public String fetchResultXMLFromDatacite(String id) throws Exception{
|
93
|
return getRequest(getDataciteUrl(id));
|
94
|
}
|
95
|
public String fetchResultXMLFromOrcid(String id) throws Exception{
|
96
|
|
97
|
return getRequest(getOrcidUrl(id));
|
98
|
}
|
99
|
public String fetchContext(String suffix) throws Exception{
|
100
|
|
101
|
return getRequest(getContextsAPIUrl()+suffix);
|
102
|
}
|
103
|
|
104
|
public String getProjectApiUrl(String id) {
|
105
|
|
106
|
return searchAPIUrl+"api/projects?openaireProjectID="+id;
|
107
|
}
|
108
|
|
109
|
public String getDatasetApiUrl(String id) {
|
110
|
|
111
|
return searchAPIUrl+"api/datasets?openaireDatasetID="+id;
|
112
|
}
|
113
|
|
114
|
public static String getPublicationApiUrl(String id) {
|
115
|
|
116
|
return searchAPIUrl+"api/publications?openairePublicationID="+id;
|
117
|
}
|
118
|
public static String getSoftwareApiUrl(String id) {
|
119
|
return searchAPIUrl+"api/software?openaireSoftwareID="+id;
|
120
|
}
|
121
|
public static String getOtherApiUrl(String id) {
|
122
|
return searchAPIUrl+"api/other?openaireOtherID="+id;
|
123
|
}
|
124
|
|
125
|
private static String getDataciteUrl(String id) {
|
126
|
|
127
|
return dataciteUrl+id;
|
128
|
}
|
129
|
|
130
|
private static String getOrcidUrl(String id) {
|
131
|
|
132
|
return orcidUrlPrefix+id+orcidUrlSuffix;
|
133
|
}
|
134
|
private static String getContextsAPIUrl() {
|
135
|
|
136
|
return contextsAPIUrl;
|
137
|
}
|
138
|
|
139
|
|
140
|
// HTTP GET request
|
141
|
private static String getRequest(String url) throws Exception {
|
142
|
URL obj = new URL(url);
|
143
|
logger.debug(url);
|
144
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
145
|
int responseCode = con.getResponseCode();
|
146
|
if(responseCode != 200){
|
147
|
return null;
|
148
|
}
|
149
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
150
|
StringBuffer response = new StringBuffer();
|
151
|
String inputLine;
|
152
|
while ((inputLine = in.readLine()) != null) {
|
153
|
response.append(inputLine).append("\n");
|
154
|
}
|
155
|
in.close();
|
156
|
return response.toString();
|
157
|
}
|
158
|
|
159
|
public static String getCrossrefJsonRecord(String doi){
|
160
|
String url=crossrefUrl+doi;
|
161
|
URL obj = null;
|
162
|
String responseStr=null;
|
163
|
try {
|
164
|
obj = new URL(url);
|
165
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
166
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
167
|
StringBuffer response = new StringBuffer();
|
168
|
String inputLine;
|
169
|
while ((inputLine = in.readLine()) != null) {
|
170
|
response.append(inputLine).append("\n");
|
171
|
}
|
172
|
in.close();
|
173
|
responseStr=response.toString();
|
174
|
if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
|
175
|
responseStr=null;
|
176
|
}
|
177
|
} catch (Exception e) {
|
178
|
try{
|
179
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
|
180
|
Date date= new java.util.Date();
|
181
|
out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
|
182
|
out.close();
|
183
|
|
184
|
}catch (IOException e1) {
|
185
|
logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
|
186
|
}
|
187
|
|
188
|
}
|
189
|
return responseStr;
|
190
|
}
|
191
|
|
192
|
/**
|
193
|
*
|
194
|
* @param doi
|
195
|
* @return true if the given doi request returns a valid answer otherwise false
|
196
|
* @throws IOException
|
197
|
*/
|
198
|
public static boolean isDoiValid(String doi) throws IOException {
|
199
|
boolean found=false;
|
200
|
String responseStr=getCrossrefJsonRecord(doi);
|
201
|
if(responseStr!=null){
|
202
|
found=true;
|
203
|
}
|
204
|
return found;
|
205
|
}
|
206
|
|
207
|
/**
|
208
|
*
|
209
|
* @param xml : API result xml
|
210
|
* @return number of results Found
|
211
|
* @throws Exception
|
212
|
*/
|
213
|
public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
|
214
|
if(xml==null){
|
215
|
return 0;
|
216
|
}
|
217
|
String totalStr=null;
|
218
|
Integer total=0;
|
219
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
220
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
221
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
222
|
Document document=dBuilder.parse(inputSource);
|
223
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
224
|
XPath xpath = xPathfactory.newXPath();
|
225
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
226
|
if (nl.getLength() > 0) {
|
227
|
totalStr= nl.item(0).getNodeValue();
|
228
|
}
|
229
|
if(totalStr!=null){
|
230
|
total=Integer.parseInt(totalStr);
|
231
|
}
|
232
|
return total;
|
233
|
}
|
234
|
|
235
|
public ClaimValidation getClaimValidation() {
|
236
|
return claimValidation;
|
237
|
}
|
238
|
|
239
|
public void setClaimValidation(ClaimValidation claimValidation) {
|
240
|
this.claimValidation = claimValidation;
|
241
|
}
|
242
|
|
243
|
public String getSearchAPIUrl() {
|
244
|
return searchAPIUrl;
|
245
|
}
|
246
|
|
247
|
public void setSearchAPIUrl(String searchAPIUrl) {
|
248
|
this.searchAPIUrl = searchAPIUrl;
|
249
|
}
|
250
|
|
251
|
public static void setContextsAPIUrl(String contextsAPIUrl) {
|
252
|
SearchUtils.contextsAPIUrl = contextsAPIUrl;
|
253
|
}
|
254
|
|
255
|
public static String getCrossrefUrl() {
|
256
|
return crossrefUrl;
|
257
|
}
|
258
|
|
259
|
public static void setCrossrefUrl(String crossrefUrl) {
|
260
|
SearchUtils.crossrefUrl = crossrefUrl;
|
261
|
}
|
262
|
|
263
|
public static String getDataciteUrl() {
|
264
|
return dataciteUrl;
|
265
|
}
|
266
|
|
267
|
public static void setDataciteUrl(String dataciteUrl) {
|
268
|
SearchUtils.dataciteUrl = dataciteUrl;
|
269
|
}
|
270
|
|
271
|
public static String getOrcidUrlPrefix() {
|
272
|
return orcidUrlPrefix;
|
273
|
}
|
274
|
|
275
|
public static void setOrcidUrlPrefix(String orcidUrlPrefix) {
|
276
|
SearchUtils.orcidUrlPrefix = orcidUrlPrefix;
|
277
|
}
|
278
|
|
279
|
public static String getOrcidUrlSuffix() {
|
280
|
return orcidUrlSuffix;
|
281
|
}
|
282
|
|
283
|
public static void setOrcidUrlSuffix(String orcidUrlSuffix) {
|
284
|
SearchUtils.orcidUrlSuffix = orcidUrlSuffix;
|
285
|
}
|
286
|
}
|
287
|
|
288
|
|