Project

General

Profile

1
package eu.dnetlib.data.claimsDemo;
2

    
3
import eu.dnetlib.data.claims.migration.ClaimValidation;
4
import org.apache.log4j.Logger;
5
import org.w3c.dom.Document;
6
import org.w3c.dom.NodeList;
7
import org.xml.sax.InputSource;
8

    
9
import javax.xml.parsers.DocumentBuilder;
10
import javax.xml.parsers.DocumentBuilderFactory;
11
import javax.xml.xpath.XPath;
12
import javax.xml.xpath.XPathConstants;
13
import javax.xml.xpath.XPathFactory;
14
import java.io.*;
15
import java.net.HttpURLConnection;
16
import java.net.URL;
17
import java.net.URLEncoder;
18
import java.sql.Timestamp;
19
import java.util.Date;
20

    
21
/**
22
 * Created by argirok on 20/11/2015.
23
 */
24
/*
25
*Search and Parsing xmls from Search Service
26
*Search for DOIs
27
*
28
 */
29
public class SearchUtils {
30
    private static final Logger logger = Logger.getLogger(SearchUtils.class);
31

    
32
    private static String searchUrl="http://beta.services.openaire.eu:8480/search/search?action=search";
33
    private static String apiUrlForResults="http://beta.services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
34
//    private static String apiUrlForDedupResults="http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
35
    private static String apiUrlForProjects="http://beta.services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
36

    
37
    private static String searchUrlProduction="http://services.openaire.eu:8480/search/search?action=search";
38
    private static String apiUrlForResultsProduction="http://services.openaire.eu:8480/search/api";//"http://api.openaire.eu/search";
39
    private static String apiUrlForProjectsProduction="http://services.openaire.eu:8480/search/api";//"http://rudie.di.uoa.gr:8080/dnet-functionality-services-1.2.0-SNAPSHOT/api/";
40

    
41

    
42
    private static String crossrefUrl="https://api.crossref.org/works?filter=doi:";
43
    private static String dataciteUrl="https://data.datacite.org/application/rdf+xml/";
44
    private static String dataciteNewAPIUrl="https://api.datacite.org/works/";
45
    private static String orcidUrlPrefix="https://pub.orcid.org/";
46
    private static String orcidUrlSuffix="/orcid-works";
47
    private boolean useApi=true;
48
    private ClaimValidation claimValidation = null;
49

    
50
    public SearchUtils(){
51

    
52
    }
53

    
54
    public static String fetchProjectXmlFromIndex(String id, boolean production) throws Exception{
55
            return getRequest(getProjectApiUrl(id, production));
56
    }
57

    
58
    /**
59
     *Look up in API, if there is no result,
60
     * consider that id is a dedup id and search for the objIdentifier id.
61
     * Then look up again in the API with the objIdentifier
62
     * @param id
63
     * @return xml or null
64
     * @throws Exception
65
     */
66
    public static String fetchPublicationXmlFromIndex(String id, boolean production) throws Exception{
67

    
68
        String xml=getRequest(getPublicationApiUrl(id, production));
69
        if(getNumberOfResultsInAPIXML(xml)==0){
70
            xml=null;
71
        }
72
        return xml;
73
    }
74
    public static String fetchDedupXmlFromIndex(String id, boolean production) throws Exception{
75

    
76
        String xml= null;
77
            String objId=getResultObjIdentifierFromSearch(id,production);
78
            if(objId!=null){
79
                xml=getRequest(getPublicationApiUrl(objId, production));
80
            }
81
        return xml;
82
    }
83

    
84
    public static String fetchSoftwareXmlFromIndex(String id, boolean production) throws Exception{
85
        String xml=getRequest(getSoftwareApiUrl(id,production));
86
         return xml;
87
    }
88

    
89

    
90
    /**
91
     *Look up in API, if there is no result,
92
     * consider that id is a dedup id and search for the objIdentifier id.
93
     * Then look up again in the API with the objIdentifier
94
     *
95
     * @param id
96
     * @return xml
97
     * @throws Exception
98
     */
99
    public static String fetchDatasetXmlFromIndex(String id, boolean production) throws Exception{
100

    
101
        String xml=getRequest(getDatasetApiUrl(id, production));
102
        if(getNumberOfResultsInAPIXML(xml)==0){
103
            xml=null;
104
        }
105
        return xml;
106

    
107
    }
108

    
109
    public static String fetchResultXMLFromDatacite(String id) throws Exception{
110
            return getRequest(getDataciteUrl(id));
111
    }
112
    public static String fetchResultXMLFromOrcid(String id) throws Exception{
113

    
114
            return getRequest(getOrcidUrl(id));
115
    }
116

    
117
     public static String getProjectApiUrl(String id, boolean production)  {
118

    
119
        return ((production)?apiUrlForProjectsProduction:apiUrlForProjects)+"/projects?openaireProjectID="+id;
120
    }
121

    
122
    public static String getDatasetApiUrl(String id, boolean production)  {
123

    
124
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/datasets?openaireDatasetID="+id;
125
    }
126

    
127
    public static String getPublicationApiUrl(String id, boolean production)  {
128

    
129
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/publications?openairePublicationID="+id;
130
    }
131
    public static String  getSoftwareApiUrl(String id, boolean production)  {
132
        //TODO change to beta when it's ready
133
        return ((production)?apiUrlForResultsProduction:apiUrlForResults)+"/software?openaireSoftwareID="+id;
134
//        return "http://rudie.di.uoa.gr:8080/dnet-functionality-services-2.0.0-SNAPSHOT/api"+"/software?openaireSoftwareID="+id;
135
    }
136
//    public static String getDedupPublicationApiUrl(String id)  {
137
//
138
//        return apiUrlForDedupResults+"/publications?openairePublicationID="+id;
139
//    }
140
    private static String getDataciteUrl(String id)  {
141

    
142
        return dataciteNewAPIUrl+id;
143
    }
144

    
145
    private static String getOrcidUrl(String id)  {
146

    
147
        return orcidUrlPrefix+id+orcidUrlSuffix;
148
    }
149

    
150

    
151
    /**
152
     * Get result and objIdentifier  form openaire Search
153
     * @param resultdupid
154
     * @return
155
     */
156
    private static String getResultXMLByResultdupidSearchByUrl(String resultdupid, boolean production)  {
157
        String url= null;
158
        try {
159
             url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
160
                    URLEncoder.encode("(((deletedbyinference = false) AND (oaftype exact result)) )" +
161
                            " and (resultdupid exact " + resultdupid + ")", "UTF-8")
162
                    +"&size=10&locale=en_GB";
163
        } catch (UnsupportedEncodingException e) {
164
            logger.error("UnsupportedEncodingException",e);
165
        }
166
        return url;
167
    }
168
//    private static String getSoftwareXMLByIdSearchByUrl(String id, boolean production)  {
169
//        String url= null;
170
//        try {
171
//            url = ((production)?searchUrlProduction:searchUrl)+"&sTransformer=results_openaire&query="+
172
//                    URLEncoder.encode("(((deletedbyinference = false) AND (resulttypeid exact software) AND (oaftype exact result)) )" +
173
//                            " and (objIdentifier exact " + id + ")", "UTF-8")
174
//                    +"&size=1&locale=en_GB";
175
//        } catch (UnsupportedEncodingException e) {
176
//            logger.error("UnsupportedEncodingException",e);
177
//        }
178
//        return url;
179
//    }
180

    
181
    // HTTP GET request
182
    private static String getRequest(String url) throws Exception {
183
        URL obj = new URL(url);
184
        logger.debug(url);
185
        HttpURLConnection con = (HttpURLConnection) obj.openConnection();
186
        int responseCode = con.getResponseCode();
187
        if(responseCode != 200){
188
            return null;
189
        }
190
        BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
191
        StringBuffer response = new StringBuffer();
192
        String inputLine;
193
        while ((inputLine = in.readLine()) != null) {
194
            response.append(inputLine).append("\n");
195
        }
196
        in.close();
197
        return response.toString();
198
    }
199

    
200
    public static String getCrossrefJsonRecord(String doi){
201
        String url=crossrefUrl+doi;
202
        URL obj = null;
203
        String responseStr=null;
204
        try {
205
            obj = new URL(url);
206
            HttpURLConnection con = (HttpURLConnection) obj.openConnection();
207
            BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
208
            StringBuffer response = new StringBuffer();
209
            String inputLine;
210
            while ((inputLine = in.readLine()) != null) {
211
                response.append(inputLine).append("\n");
212
            }
213
            in.close();
214
            responseStr=response.toString();
215
            if(responseStr==null||(!responseStr.contains("\"status\":\"ok\"")||!(responseStr.contains("\"DOI\":\"")))){
216
                responseStr=null;
217
            }
218
        } catch (Exception e) {
219
            try{
220
                PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("dois_malformed_urls.txt", true)));
221
                Date date= new java.util.Date();
222
                out.println(new Timestamp(date.getTime())+"Doi:"+doi+" Url:"+url);
223
                out.close();
224

    
225
            }catch (IOException e1) {
226
                logger.error("Couldn't write to file " + "dois_malformed_urls.txt",e);
227
            }
228

    
229
        }
230
        return responseStr;
231
    }
232

    
233
    /**
234
     *
235
     * @param doi
236
     * @return true  if the given doi request returns a valid answer otherwise false
237
     * @throws IOException
238
     */
239
     public static boolean isDoiValid(String doi) throws IOException {
240
            boolean found=false;
241
            String responseStr=getCrossrefJsonRecord(doi);
242
             if(responseStr!=null){
243
                found=true;
244
            }
245
            return found;
246
    }
247

    
248
    /**
249
     * Search in index for result with resultdupid
250
     * @param  resultdupid  Openaire Id
251
     * @return objIdentifier
252
     */
253
    private static String getResultObjIdentifierFromSearch(String resultdupid, boolean  production) throws Exception {
254
        String xml=getRequest(getResultXMLByResultdupidSearchByUrl(resultdupid,production));
255
        String objIdentifier=null;
256
            String size=null;
257
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
258
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
259
            InputSource inputSource= new InputSource(new StringReader(xml));
260
            Document document=dBuilder.parse(inputSource);
261
             XPathFactory xPathfactory= XPathFactory.newInstance();
262
            XPath xpath = xPathfactory.newXPath();
263
            NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
264
            if (nl.getLength() > 0) {
265
                size= nl.item(0).getNodeValue();
266
            }
267
            if(size!=null && Integer.parseInt(size)>0){
268
                nl = (NodeList) xpath.compile("//field[@indexId='objIdentifier']/@value").evaluate(document, XPathConstants.NODESET);
269
                if (nl.getLength() > 0) {
270
                    objIdentifier=nl.item(0).getNodeValue();
271
                }
272
            }
273
        return objIdentifier;
274
    }
275

    
276
    /**
277
     *
278
     * @param xml : API result xml
279
     * @return number of results Found
280
     * @throws Exception
281
     */
282
    public static Integer getNumberOfResultsInAPIXML(String xml) throws Exception {
283
        if(xml==null){
284
            return 0;
285
        }
286
        String totalStr=null;
287
        Integer total=0;
288
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
289
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
290
        InputSource inputSource= new InputSource(new StringReader(xml));
291
        Document document=dBuilder.parse(inputSource);
292
        XPathFactory xPathfactory= XPathFactory.newInstance();
293
        XPath xpath = xPathfactory.newXPath();
294
        NodeList nl = (NodeList) xpath.compile("//total/text()").evaluate(document, XPathConstants.NODESET);
295
        if (nl.getLength() > 0) {
296
            totalStr= nl.item(0).getNodeValue();
297
        }
298
        if(totalStr!=null){
299
            total=Integer.parseInt(totalStr);
300
        }
301
        return total;
302
    }
303

    
304
    public ClaimValidation getClaimValidation() {
305
        return claimValidation;
306
    }
307

    
308
    public void setClaimValidation(ClaimValidation claimValidation) {
309
        this.claimValidation = claimValidation;
310
    }
311
}
312

    
313

    
(6-6/9)