Project

General

Profile

1
package eu.dnetlib.data.claimsDemo;
2

    
3
import eu.dnetlib.data.claims.migration.ClaimValidation;
4
import org.apache.log4j.Logger;
5
import org.w3c.dom.Document;
6
import org.w3c.dom.NodeList;
7
import org.xml.sax.InputSource;
8

    
9
import javax.xml.parsers.DocumentBuilder;
10
import javax.xml.parsers.DocumentBuilderFactory;
11
import javax.xml.xpath.XPath;
12
import javax.xml.xpath.XPathConstants;
13
import javax.xml.xpath.XPathFactory;
14
import java.io.*;
15
import java.net.HttpURLConnection;
16
import java.net.URL;
17
import java.net.URLEncoder;
18
import java.sql.Timestamp;
19
import java.util.Date;
20

    
21
/**
22
 * Created by argirok on 20/11/2015.
23
 */
24
/*
25
*Search and Parsing xmls from Search Service
26
*Search for DOIs
27
*
28
 */
29
public class SearchUtils {
30
    private static final Logger logger = Logger.getLogger(SearchUtils.class);
31

    
32
    private static String searchUrl="http://beta.services.openaire.eu:8480/search/search?action=search";
33
    private static String apiUrlForResults="http://beta.services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
34
//    private static String apiUrlForDedupResults="http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
35
    private static String apiUrlForProjects="http://beta.services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
36

    
37
    private static String searchUrlProduction="http://services.openaire.eu:8480/search/search?action=search";
38
    private static String apiUrlForResultsProduction="http://services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
39
    private static String apiUrlForProjectsProduction="http://services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
40

    
41

    
42
    private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
43
    private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
44
    private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
45
    private static String orcidUrlPrefix="https://pub.orcid.org/v2.1/";
46
    private static String orcidUrlSuffix="/works";
47
//    private static String contextsAPIUrl="https://dev-openaire.d4science.org/openaire/context";
48
    private static String contextsAPIUrl="http://beta.services.openaire.eu:8080/openaire/context";
49
    private static String contextsAPIUrlProduction="http://services.openaire.eu:8080/openaire/context";
50
    private boolean useApi=true;
51
    private ClaimValidation claimValidation = null;
52

    
53
    public SearchUtils(){
54

    
55
    }
56

    
57
    public static String fetchProjectXmlFromIndex(String id, boolean production) throws Exception{
58
            return getRequest(getProjectApiUrl(id, production));
59
    }
60

    
61
    /**
62
     *Look up in API, if there is no result,
63
     * consider that id is a dedup id and search for the objIdentifier id.
64
     * Then look up again in the API with the objIdentifier
65
     * @param id
66
     * @return xml or null
67
     * @throws Exception
68
     */
69
    public static String fetchPublicationXmlFromIndex(String id, boolean production) throws Exception{
70

    
71
        String xml=getRequest(getPublicationApiUrl(id, production));
72
        if(getNumberOfResultsInAPIXML(xml)==0){
73
            xml=null;
74
        }
75
        return xml;
76
    }
77
    public static String fetchDedupXmlFromIndex(String id, boolean production) throws Exception{
78

    
79
        String xml= null;
80
            String objId=getResultObjIdentifierFromSearch(id,production);
81
            if(objId!=null){
82
                xml=getRequest(getPublicationApiUrl(objId, production));
83
            }
84
        return xml;
85
    }
86

    
87
    public static String fetchSoftwareXmlFromIndex(String id, boolean production) throws Exception{
88
        String xml=getRequest(getSoftwareApiUrl(id,production));
89
         return xml;
90
    }
91

    
92
    public static String fetchOtherXmlFromIndex(String id, boolean production) throws Exception{
93
        String xml=getRequest(getOtherApiUrl(id,production));
94
        return xml;
95
    }
96
    /**
97
     *Look up in API, if there is no result,
98
     * consider that id is a dedup id and search for the objIdentifier id.
99
     * Then look up again in the API with the objIdentifier
100
     *
101
     * @param id
102
     * @return xml
103
     * @throws Exception
104
     */
105
    public static String fetchDatasetXmlFromIndex(String id, boolean production) throws Exception{
106

    
107
        String xml=getRequest(getDatasetApiUrl(id, production));
108
        if(getNumberOfResultsInAPIXML(xml)==0){
109
            xml=null;
110
        }
111
        return xml;
112

    
113
    }
114

    
115
    public static String fetchResultXMLFromDatacite(String id) throws Exception{
116
            return getRequest(getDataciteUrl(id));
117
    }
118
    public static String fetchResultXMLFromOrcid(String id) throws Exception{
119

    
120
            return getRequest(getOrcidUrl(id));
121
    }
122
    public static String fetchContext(String suffix, boolean production) throws Exception{
123

    
124
        return getRequest(getContextsAPIUrl(production)+suffix);
125
    }
126

    
127
     public static String getProjectApiUrl(String id, boolean production)  {
128

    
129
        return ((production)?apiUrlForProjectsProduction:apiUrlForProjects)+"/projects?openaireProjectID="+id;
130
    }
131

    
132
    public static String getDatasetApiUrl(String id, boolean production)  {
133

    
134
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/datasets?openaireDatasetID="+id;
135
    }
136

    
137
    public static String getPublicationApiUrl(String id, boolean production)  {
138

    
139
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/publications?openairePublicationID="+id;
140
    }
141
    public static String  getSoftwareApiUrl(String id, boolean production)  {
142
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/software?openaireSoftwareID="+id;
143
    }
144
    public static String  getOtherApiUrl(String id, boolean production)  {
145
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/other?openaireOtherID="+id;
146
    }
147
//    public static String getDedupPublicationApiUrl(String id)  {
148
//
149
//        return apiUrlForDedupResults+"/publications?openairePublicationID="+id;
150
//    }
151
    private static String getDataciteUrl(String id)  {
152

    
153
        return dataciteNewAPIUrl+id;
154
    }
155

    
156
    private static String getOrcidUrl(String id)  {
157

    
158
        return orcidUrlPrefix+id+orcidUrlSuffix;
159
    }
160
    private static String getContextsAPIUrl( boolean production)  {
161

    
162
        return ((production)?contextsAPIUrlProduction:contextsAPIUrl);
163
    }
164

    
165
    /**
166
     * Get result and objIdentifier  form openaire Search
167
     * @param resultdupid
168
     * @return
169
     */
170
    private static String getResultXMLByResultdupidSearchByUrl(String resultdupid, boolean production)  {
171
        String url= null;
172
        try {
173
             url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
174
                    URLEncoder.encode("(((deletedbyinference = false) AND (oaftype exact result)) )" +
175
                            " and (resultdupid exact " + resultdupid + ")", "UTF-8")
176
                    +"&size=10&locale=en_GB";
177
        } catch (UnsupportedEncodingException e) {
178
            logger.error("UnsupportedEncodingException",e);
179
        }
180
        return url;
181
    }
182
//    private static String getSoftwareXMLByIdSearchByUrl(String id, boolean production)  {
183
//        String url= null;
184
//        try {
185
//            url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
186
//                    URLEncoder.encode("(((deletedbyinference = false) AND (resulttypeid exact software) AND (oaftype exact result)) )" +
187
//                            " and (objIdentifier exact " + id + ")", "UTF-8")
188
//                    +"&size=1&locale=en_GB";
189
//        } catch (UnsupportedEncodingException e) {
190
//            logger.error("UnsupportedEncodingException",e);
191
//        }
192
//        return url;
193
//    }
194

    
195
    // HTTP GET request
196
    private static String getRequest(String url) throws Exception {
197
        URL obj = new URL(url);
198
        logger.debug(url);
199
        HttpURLConnection con = (HttpURLConnection) obj.openConnection();
200
        int responseCode = con.getResponseCode();
201
        if(responseCode != 200){
202
            return null;
203
        }
204
        BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
205
        StringBuffer response = new StringBuffer();
206
        String inputLine;
207
        while ((inputLine = in.readLine()) != null) {
208
            response.append(inputLine).append("\n");
209
        }
210
        in.close();
211
        return response.toString();
212
    }
213

    
214
    public static String getCrossrefJsonRecord(String doi){
215
        String url=crossrefUrl+doi;
216
        URL obj = null;
217
        String responseStr=null;
218
        try {
219
            obj = new URL(url);
220
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
221
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
222
            StringBuffer response = new StringBuffer();
223
            String inputLine;
224
            while ((inputLine = in.readLine()) != null) {
225
                response.append(inputLine).append("\n");
226
            }
227
            in.close();
228
            responseStr=response.toString();
229
            if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
230
                responseStr=null;
231
            }
232
        } catch (Exception e) {
233
            try{
234
                PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
235
                Date date= new java.util.Date();
236
                out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
237
                out.close();
238

    
239
            }catch (IOException e1) {
240
                logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
241
            }
242

    
243
        }
244
        return responseStr;
245
    }
246

    
247
    /**
248
     *
249
     * @param doi
250
     * @return true  if the given doi request returns a valid answer otherwise false
251
     * @throws IOException
252
     */
253
     public static boolean isDoiValid(String doi) throws IOException {
254
            boolean found=false;
255
            String responseStr=getCrossrefJsonRecord(doi);
256
             if(responseStr!=null){
257
                found=true;
258
            }
259
            return found;
260
    }
261

    
262
    /**
263
     * Search in index for result with resultdupid
264
     * @param  resultdupid  Openaire Id
265
     * @return objIdentifier
266
     */
267
    private static String getResultObjIdentifierFromSearch(String resultdupid, boolean  production) throws Exception {
268
        String xml=getRequest(getResultXMLByResultdupidSearchByUrl(resultdupid,production));
269
        String objIdentifier=null;
270
            String size=null;
271
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
272
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
273
            InputSource inputSource= new InputSource(new StringReader(xml));
274
            Document document=dBuilder.parse(inputSource);
275
             XPathFactory xPathfactory= XPathFactory.newInstance();
276
            XPath xpath = xPathfactory.newXPath();
277
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
278
            if (nl.getLength() > 0) {
279
                size= nl.item(0).getNodeValue();
280
            }
281
            if(size!=null && Integer.parseInt(size)>0){
282
                nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
283
                if (nl.getLength() > 0) {
284
                    objIdentifier=nl.item(0).getNodeValue();
285
                }
286
            }
287
        return objIdentifier;
288
    }
289

    
290
    /**
291
     *
292
     * @param xml : API result xml
293
     * @return number of results Found
294
     * @throws Exception
295
     */
296
    public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
297
        if(xml==null){
298
            return 0;
299
        }
300
        String totalStr=null;
301
        Integer total=0;
302
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
303
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
304
        InputSource inputSource= new InputSource(new StringReader(xml));
305
        Document document=dBuilder.parse(inputSource);
306
        XPathFactory xPathfactory= XPathFactory.newInstance();
307
        XPath xpath = xPathfactory.newXPath();
308
        NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
309
        if (nl.getLength() > 0) {
310
            totalStr= nl.item(0).getNodeValue();
311
        }
312
        if(totalStr!=null){
313
            total=Integer.parseInt(totalStr);
314
        }
315
        return total;
316
    }
317

    
318
    public ClaimValidation getClaimValidation() {
319
        return claimValidation;
320
    }
321

    
322
    public void setClaimValidation(ClaimValidation claimValidation) {
323
        this.claimValidation = claimValidation;
324
    }
325
}
326

    
327

    
(6-6/9)