Project

General

Profile

1
package eu.dnetlib.data.claims.utils;
2

    
3
import org.apache.log4j.Logger;
4
import org.w3c.dom.Document;
5
import org.w3c.dom.NodeList;
6
import org.xml.sax.InputSource;
7

    
8
import javax.xml.parsers.DocumentBuilder;
9
import javax.xml.parsers.DocumentBuilderFactory;
10
import javax.xml.xpath.XPath;
11
import javax.xml.xpath.XPathConstants;
12
import javax.xml.xpath.XPathFactory;
13
import java.io.*;
14
import java.net.HttpURLConnection;
15
import java.net.URL;
16
import java.net.URLEncoder;
17
import java.sql.Timestamp;
18
import java.util.Date;
19

    
20
/**
21
 * Created by argirok on 20/11/2015.
22
 */
23
/*
24
*Search and Parsing xmls from Search Service
25
*Search for DOIs
26
*
27
 */
28
public class SearchUtils {
29
    private static final Logger logger = Logger.getLogger(SearchUtils.class);
30

    
31
    private static String searchUrl="http://beta.services.openaire.eu:8480/search/search?action=search";
32
    private static String apiUrlForResults="http://beta.services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
33
//    private static String apiUrlForDedupResults="http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
34
    private static String apiUrlForProjects="http://beta.services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
35

    
36
    private static String searchUrlProduction="http://services.openaire.eu:8480/search/search?action=search";
37
    private static String apiUrlForResultsProduction="http://services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
38
    private static String apiUrlForProjectsProduction="http://services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
39

    
40

    
41
    private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
42
    private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
43
    private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
44
    private static String orcidUrlPrefix="https://pub.orcid.org/v2.1/";
45
    private static String orcidUrlSuffix="/works";
46
//    private static String contextsAPIUrl="https://dev-openaire.d4science.org/openaire/context";
47
    private static String contextsAPIUrl="http://beta.services.openaire.eu:8080/openaire/context";
48
    private static String contextsAPIUrlProduction="http://services.openaire.eu:8080/openaire/context";
49
    private boolean useApi=true;
50
    private ClaimValidation claimValidation = null;
51

    
52
    public SearchUtils(){
53

    
54
    }
55

    
56
    public static String fetchProjectXmlFromIndex(String id, boolean production) throws Exception{
57
            return getRequest(getProjectApiUrl(id, production));
58
    }
59

    
60
    /**
61
     *Look up in API, if there is no result,
62
     * consider that id is a dedup id and search for the objIdentifier id.
63
     * Then look up again in the API with the objIdentifier
64
     * @param id
65
     * @return xml or null
66
     * @throws Exception
67
     */
68
    public static String fetchPublicationXmlFromIndex(String id, boolean production) throws Exception{
69

    
70
        String xml=getRequest(getPublicationApiUrl(id, production));
71
        if(getNumberOfResultsInAPIXML(xml)==0){
72
            xml=null;
73
        }
74
        return xml;
75
    }
76
    public static String fetchDedupXmlFromIndex(String id, boolean production) throws Exception{
77

    
78
        String xml= null;
79
            String objId=getResultObjIdentifierFromSearch(id,production);
80
            if(objId!=null){
81
                xml=getRequest(getPublicationApiUrl(objId, production));
82
            }
83
        return xml;
84
    }
85

    
86
    public static String fetchSoftwareXmlFromIndex(String id, boolean production) throws Exception{
87
        String xml=getRequest(getSoftwareApiUrl(id,production));
88
         return xml;
89
    }
90

    
91
    public static String fetchOtherXmlFromIndex(String id, boolean production) throws Exception{
92
        String xml=getRequest(getOtherApiUrl(id,production));
93
        return xml;
94
    }
95
    /**
96
     *Look up in API, if there is no result,
97
     * consider that id is a dedup id and search for the objIdentifier id.
98
     * Then look up again in the API with the objIdentifier
99
     *
100
     * @param id
101
     * @return xml
102
     * @throws Exception
103
     */
104
    public static String fetchDatasetXmlFromIndex(String id, boolean production) throws Exception{
105

    
106
        String xml=getRequest(getDatasetApiUrl(id, production));
107
        if(getNumberOfResultsInAPIXML(xml)==0){
108
            xml=null;
109
        }
110
        return xml;
111

    
112
    }
113

    
114
    public static String fetchResultXMLFromDatacite(String id) throws Exception{
115
            return getRequest(getDataciteUrl(id));
116
    }
117
    public static String fetchResultXMLFromOrcid(String id) throws Exception{
118

    
119
            return getRequest(getOrcidUrl(id));
120
    }
121
    public static String fetchContext(String suffix, boolean production) throws Exception{
122

    
123
        return getRequest(getContextsAPIUrl(production)+suffix);
124
    }
125

    
126
     public static String getProjectApiUrl(String id, boolean production)  {
127

    
128
        return ((production)?apiUrlForProjectsProduction:apiUrlForProjects)+"/projects?openaireProjectID="+id;
129
    }
130

    
131
    public static String getDatasetApiUrl(String id, boolean production)  {
132

    
133
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/datasets?openaireDatasetID="+id;
134
    }
135

    
136
    public static String getPublicationApiUrl(String id, boolean production)  {
137

    
138
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/publications?openairePublicationID="+id;
139
    }
140
    public static String  getSoftwareApiUrl(String id, boolean production)  {
141
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/software?openaireSoftwareID="+id;
142
    }
143
    public static String  getOtherApiUrl(String id, boolean production)  {
144
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/other?openaireOtherID="+id;
145
    }
146
//    public static String getDedupPublicationApiUrl(String id)  {
147
//
148
//        return apiUrlForDedupResults+"/publications?openairePublicationID="+id;
149
//    }
150
    private static String getDataciteUrl(String id)  {
151

    
152
        return dataciteNewAPIUrl+id;
153
    }
154

    
155
    private static String getOrcidUrl(String id)  {
156

    
157
        return orcidUrlPrefix+id+orcidUrlSuffix;
158
    }
159
    private static String getContextsAPIUrl( boolean production)  {
160

    
161
        return ((production)?contextsAPIUrlProduction:contextsAPIUrl);
162
    }
163

    
164
    /**
165
     * Get result and objIdentifier  form openaire Search
166
     * @param resultdupid
167
     * @return
168
     */
169
    private static String getResultXMLByResultdupidSearchByUrl(String resultdupid, boolean production)  {
170
        String url= null;
171
        try {
172
             url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
173
                    URLEncoder.encode("(((deletedbyinference = false) AND (oaftype exact result)) )" +
174
                            " and (resultdupid exact " + resultdupid + ")", "UTF-8")
175
                    +"&size=10&locale=en_GB";
176
        } catch (UnsupportedEncodingException e) {
177
            logger.error("UnsupportedEncodingException",e);
178
        }
179
        return url;
180
    }
181
//    private static String getSoftwareXMLByIdSearchByUrl(String id, boolean production)  {
182
//        String url= null;
183
//        try {
184
//            url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
185
//                    URLEncoder.encode("(((deletedbyinference = false) AND (resulttypeid exact software) AND (oaftype exact result)) )" +
186
//                            " and (objIdentifier exact " + id + ")", "UTF-8")
187
//                    +"&size=1&locale=en_GB";
188
//        } catch (UnsupportedEncodingException e) {
189
//            logger.error("UnsupportedEncodingException",e);
190
//        }
191
//        return url;
192
//    }
193

    
194
    // HTTP GET request
195
    private static String getRequest(String url) throws Exception {
196
        URL obj = new URL(url);
197
        logger.debug(url);
198
        HttpURLConnection con = (HttpURLConnection) obj.openConnection();
199
        int responseCode = con.getResponseCode();
200
        if(responseCode != 200){
201
            return null;
202
        }
203
        BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
204
        StringBuffer response = new StringBuffer();
205
        String inputLine;
206
        while ((inputLine = in.readLine()) != null) {
207
            response.append(inputLine).append("\n");
208
        }
209
        in.close();
210
        return response.toString();
211
    }
212

    
213
    public static String getCrossrefJsonRecord(String doi){
214
        String url=crossrefUrl+doi;
215
        URL obj = null;
216
        String responseStr=null;
217
        try {
218
            obj = new URL(url);
219
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
220
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
221
            StringBuffer response = new StringBuffer();
222
            String inputLine;
223
            while ((inputLine = in.readLine()) != null) {
224
                response.append(inputLine).append("\n");
225
            }
226
            in.close();
227
            responseStr=response.toString();
228
            if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
229
                responseStr=null;
230
            }
231
        } catch (Exception e) {
232
            try{
233
                PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
234
                Date date= new java.util.Date();
235
                out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
236
                out.close();
237

    
238
            }catch (IOException e1) {
239
                logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
240
            }
241

    
242
        }
243
        return responseStr;
244
    }
245

    
246
    /**
247
     *
248
     * @param doi
249
     * @return true  if the given doi request returns a valid answer otherwise false
250
     * @throws IOException
251
     */
252
     public static boolean isDoiValid(String doi) throws IOException {
253
            boolean found=false;
254
            String responseStr=getCrossrefJsonRecord(doi);
255
             if(responseStr!=null){
256
                found=true;
257
            }
258
            return found;
259
    }
260

    
261
    /**
262
     * Search in index for result with resultdupid
263
     * @param  resultdupid  Openaire Id
264
     * @return objIdentifier
265
     */
266
    private static String getResultObjIdentifierFromSearch(String resultdupid, boolean  production) throws Exception {
267
        String xml=getRequest(getResultXMLByResultdupidSearchByUrl(resultdupid,production));
268
        String objIdentifier=null;
269
            String size=null;
270
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
271
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
272
            InputSource inputSource= new InputSource(new StringReader(xml));
273
            Document document=dBuilder.parse(inputSource);
274
             XPathFactory xPathfactory= XPathFactory.newInstance();
275
            XPath xpath = xPathfactory.newXPath();
276
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
277
            if (nl.getLength() > 0) {
278
                size= nl.item(0).getNodeValue();
279
            }
280
            if(size!=null && Integer.parseInt(size)>0){
281
                nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
282
                if (nl.getLength() > 0) {
283
                    objIdentifier=nl.item(0).getNodeValue();
284
                }
285
            }
286
        return objIdentifier;
287
    }
288

    
289
    /**
290
     *
291
     * @param xml : API result xml
292
     * @return number of results Found
293
     * @throws Exception
294
     */
295
    public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
296
        if(xml==null){
297
            return 0;
298
        }
299
        String totalStr=null;
300
        Integer total=0;
301
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
302
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
303
        InputSource inputSource= new InputSource(new StringReader(xml));
304
        Document document=dBuilder.parse(inputSource);
305
        XPathFactory xPathfactory= XPathFactory.newInstance();
306
        XPath xpath = xPathfactory.newXPath();
307
        NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
308
        if (nl.getLength() > 0) {
309
            totalStr= nl.item(0).getNodeValue();
310
        }
311
        if(totalStr!=null){
312
            total=Integer.parseInt(totalStr);
313
        }
314
        return total;
315
    }
316

    
317
    public ClaimValidation getClaimValidation() {
318
        return claimValidation;
319
    }
320

    
321
    public void setClaimValidation(ClaimValidation claimValidation) {
322
        this.claimValidation = claimValidation;
323
    }
324
}
325

    
326

    
(9-9/9)