1
|
package eu.dnetlib.data.claims.utils;
|
2
|
|
3
|
import org.apache.log4j.Logger;
|
4
|
import org.w3c.dom.Document;
|
5
|
import org.w3c.dom.NodeList;
|
6
|
import org.xml.sax.InputSource;
|
7
|
|
8
|
import javax.xml.parsers.DocumentBuilder;
|
9
|
import javax.xml.parsers.DocumentBuilderFactory;
|
10
|
import javax.xml.xpath.XPath;
|
11
|
import javax.xml.xpath.XPathConstants;
|
12
|
import javax.xml.xpath.XPathFactory;
|
13
|
import java.io.*;
|
14
|
import java.net.HttpURLConnection;
|
15
|
import java.net.URL;
|
16
|
import java.net.URLEncoder;
|
17
|
import java.sql.Timestamp;
|
18
|
import java.util.Date;
|
19
|
|
20
|
/**
|
21
|
* Created by argirok on 20/11/2015.
|
22
|
*/
|
23
|
/*
|
24
|
*Search and Parsing xmls from Search Service
|
25
|
*Search for DOIs
|
26
|
*
|
27
|
*/
|
28
|
public class SearchUtils {
|
29
|
private static final Logger logger = Logger.getLogger(SearchUtils.class);
|
30
|
public static String searchAPIUrl=null;//"http://beta.services.openaire.eu:8480/search/";
|
31
|
|
32
|
private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
|
33
|
private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
|
34
|
private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
|
35
|
private static String orcidUrlPrefix="https://pub.orcid.org/v2.1/";
|
36
|
private static String orcidUrlSuffix="/works";
|
37
|
// private static String contextsAPIUrl="https://dev-openaire.d4science.org/openaire/context";
|
38
|
private static String contextsAPIUrl="http://beta.services.openaire.eu:8080/openaire/context";
|
39
|
private static String contextsAPIUrlProduction="http://services.openaire.eu:8080/openaire/context";
|
40
|
private boolean useApi=true;
|
41
|
private ClaimValidation claimValidation = null;
|
42
|
|
43
|
public SearchUtils(){
|
44
|
|
45
|
}
|
46
|
|
47
|
public String fetchProjectXmlFromIndex(String id) throws Exception{
|
48
|
return getRequest(getProjectApiUrl(id));
|
49
|
}
|
50
|
|
51
|
/**
|
52
|
*Look up in API, if there is no result,
|
53
|
* consider that id is a dedup id and search for the objIdentifier id.
|
54
|
* Then look up again in the API with the objIdentifier
|
55
|
* @param id
|
56
|
* @return xml or null
|
57
|
* @throws Exception
|
58
|
*/
|
59
|
public static String fetchPublicationXmlFromIndex(String id) throws Exception{
|
60
|
|
61
|
String xml=getRequest(getPublicationApiUrl(id));
|
62
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
63
|
xml=null;
|
64
|
}
|
65
|
return xml;
|
66
|
}
|
67
|
|
68
|
public static String fetchSoftwareXmlFromIndex(String id) throws Exception{
|
69
|
String xml=getRequest(getSoftwareApiUrl(id));
|
70
|
return xml;
|
71
|
}
|
72
|
|
73
|
public static String fetchOtherXmlFromIndex(String id) throws Exception{
|
74
|
String xml=getRequest(getOtherApiUrl(id));
|
75
|
return xml;
|
76
|
}
|
77
|
/**
|
78
|
*Look up in API, if there is no result,
|
79
|
* consider that id is a dedup id and search for the objIdentifier id.
|
80
|
* Then look up again in the API with the objIdentifier
|
81
|
*
|
82
|
* @param id
|
83
|
* @return xml
|
84
|
* @throws Exception
|
85
|
*/
|
86
|
public String fetchDatasetXmlFromIndex(String id) throws Exception{
|
87
|
|
88
|
String xml=getRequest(getDatasetApiUrl(id));
|
89
|
if(getNumberOfResultsInAPIXML(xml)==0){
|
90
|
xml=null;
|
91
|
}
|
92
|
return xml;
|
93
|
|
94
|
}
|
95
|
|
96
|
public static String fetchResultXMLFromDatacite(String id) throws Exception{
|
97
|
return getRequest(getDataciteUrl(id));
|
98
|
}
|
99
|
public static String fetchResultXMLFromOrcid(String id) throws Exception{
|
100
|
|
101
|
return getRequest(getOrcidUrl(id));
|
102
|
}
|
103
|
public static String fetchContext(String suffix, boolean production) throws Exception{
|
104
|
|
105
|
return getRequest(getContextsAPIUrl(production)+suffix);
|
106
|
}
|
107
|
|
108
|
public String getProjectApiUrl(String id) {
|
109
|
|
110
|
return searchAPIUrl+"api/projects?openaireProjectID="+id;
|
111
|
}
|
112
|
|
113
|
public String getDatasetApiUrl(String id) {
|
114
|
|
115
|
return searchAPIUrl+"api/datasets?openaireDatasetID="+id;
|
116
|
}
|
117
|
|
118
|
public static String getPublicationApiUrl(String id) {
|
119
|
|
120
|
return searchAPIUrl+"api/publications?openairePublicationID="+id;
|
121
|
}
|
122
|
public static String getSoftwareApiUrl(String id) {
|
123
|
return searchAPIUrl+"api/software?openaireSoftwareID="+id;
|
124
|
}
|
125
|
public static String getOtherApiUrl(String id) {
|
126
|
return searchAPIUrl+"api/other?openaireOtherID="+id;
|
127
|
}
|
128
|
|
129
|
private static String getDataciteUrl(String id) {
|
130
|
|
131
|
return dataciteNewAPIUrl+id;
|
132
|
}
|
133
|
|
134
|
private static String getOrcidUrl(String id) {
|
135
|
|
136
|
return orcidUrlPrefix+id+orcidUrlSuffix;
|
137
|
}
|
138
|
private static String getContextsAPIUrl( boolean production) {
|
139
|
|
140
|
return ((production)?contextsAPIUrlProduction:contextsAPIUrl);
|
141
|
}
|
142
|
|
143
|
|
144
|
// HTTP GET request
|
145
|
private static String getRequest(String url) throws Exception {
|
146
|
URL obj = new URL(url);
|
147
|
logger.debug(url);
|
148
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
149
|
int responseCode = con.getResponseCode();
|
150
|
if(responseCode != 200){
|
151
|
return null;
|
152
|
}
|
153
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
154
|
StringBuffer response = new StringBuffer();
|
155
|
String inputLine;
|
156
|
while ((inputLine = in.readLine()) != null) {
|
157
|
response.append(inputLine).append("\n");
|
158
|
}
|
159
|
in.close();
|
160
|
return response.toString();
|
161
|
}
|
162
|
|
163
|
public static String getCrossrefJsonRecord(String doi){
|
164
|
String url=crossrefUrl+doi;
|
165
|
URL obj = null;
|
166
|
String responseStr=null;
|
167
|
try {
|
168
|
obj = new URL(url);
|
169
|
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
|
170
|
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
171
|
StringBuffer response = new StringBuffer();
|
172
|
String inputLine;
|
173
|
while ((inputLine = in.readLine()) != null) {
|
174
|
response.append(inputLine).append("\n");
|
175
|
}
|
176
|
in.close();
|
177
|
responseStr=response.toString();
|
178
|
if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
|
179
|
responseStr=null;
|
180
|
}
|
181
|
} catch (Exception e) {
|
182
|
try{
|
183
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
|
184
|
Date date= new java.util.Date();
|
185
|
out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
|
186
|
out.close();
|
187
|
|
188
|
}catch (IOException e1) {
|
189
|
logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
|
190
|
}
|
191
|
|
192
|
}
|
193
|
return responseStr;
|
194
|
}
|
195
|
|
196
|
/**
|
197
|
*
|
198
|
* @param doi
|
199
|
* @return true if the given doi request returns a valid answer otherwise false
|
200
|
* @throws IOException
|
201
|
*/
|
202
|
public static boolean isDoiValid(String doi) throws IOException {
|
203
|
boolean found=false;
|
204
|
String responseStr=getCrossrefJsonRecord(doi);
|
205
|
if(responseStr!=null){
|
206
|
found=true;
|
207
|
}
|
208
|
return found;
|
209
|
}
|
210
|
|
211
|
/**
|
212
|
*
|
213
|
* @param xml : API result xml
|
214
|
* @return number of results Found
|
215
|
* @throws Exception
|
216
|
*/
|
217
|
public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
|
218
|
if(xml==null){
|
219
|
return 0;
|
220
|
}
|
221
|
String totalStr=null;
|
222
|
Integer total=0;
|
223
|
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
224
|
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
225
|
InputSource inputSource= new InputSource(new StringReader(xml));
|
226
|
Document document=dBuilder.parse(inputSource);
|
227
|
XPathFactory xPathfactory= XPathFactory.newInstance();
|
228
|
XPath xpath = xPathfactory.newXPath();
|
229
|
NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
|
230
|
if (nl.getLength() > 0) {
|
231
|
totalStr= nl.item(0).getNodeValue();
|
232
|
}
|
233
|
if(totalStr!=null){
|
234
|
total=Integer.parseInt(totalStr);
|
235
|
}
|
236
|
return total;
|
237
|
}
|
238
|
|
239
|
public ClaimValidation getClaimValidation() {
|
240
|
return claimValidation;
|
241
|
}
|
242
|
|
243
|
public void setClaimValidation(ClaimValidation claimValidation) {
|
244
|
this.claimValidation = claimValidation;
|
245
|
}
|
246
|
|
247
|
public String getSearchAPIUrl() {
|
248
|
return searchAPIUrl;
|
249
|
}
|
250
|
|
251
|
public void setSearchAPIUrl(String searchAPIUrl) {
|
252
|
this.searchAPIUrl = searchAPIUrl;
|
253
|
}
|
254
|
}
|
255
|
|
256
|
|