Project

General

Profile

« Previous | Next » 

Revision 47316

MERGE newAPI

View differences:

SolrResultSet.java
1 1
package eu.dnetlib.data.search.solr;
2 2

  
3
import com.google.gson.Gson;
4
import eu.dnetlib.data.search.transform.Transformer;
5
import eu.dnetlib.data.search.transform.TransformerException;
6
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
7
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
3 8
import eu.dnetlib.domain.EPR;
4
import eu.dnetlib.functionality.cql.CqlTranslator;
5
import eu.dnetlib.functionality.cql.CqlTranslatorImpl;
6
import eu.dnetlib.functionality.cql.lucene.TranslatedQuery;
7 9
import gr.uoa.di.driver.enabling.resultset.ResultSet;
8 10
import org.apache.commons.lang.StringEscapeUtils;
9 11
import org.apache.log4j.Logger;
......
16 18
import org.apache.solr.common.util.NamedList;
17 19
import org.z3950.zing.cql.CQLParseException;
18 20

  
21
import javax.ws.rs.core.MediaType;
19 22
import java.io.IOException;
20 23
import java.util.ArrayList;
21
import java.util.Collections;
24
import java.util.HashMap;
22 25
import java.util.List;
26
import java.util.Map;
23 27

  
24 28
/**
25 29
 * Created by antleb on 2/4/14.
......
32 36
    private CloudSolrServer solrClient = null;
33 37

  
34 38
    private NamedList<String> queryOpts = new NamedList<String>();
35
    int size = -1;
39
    long size = -1;
36 40

  
37 41

  
38 42
    public SolrResultSet(EPR epr, CloudSolrServer solrClient) throws IOException, CQLParseException {
39

  
40 43
        this.epr = epr;
41 44
        this.solrClient = solrClient;
42
        //solrClient.getLbServer().setConnectionTimeout(10000);
45
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
43 46

  
44

  
45
        CqlTranslator translator = new CqlTranslatorImpl();
46
        String cqlQuery = epr.getParameter("query");
47
        //logger.debug("epr query param: " + cqlQuery);
48
        String[] queryParts = cqlQuery.split("&groupby=");
49

  
50
        //logger.debug("Got query " + cqlQuery);
51
        //for (int i = 0; i < queryParts.length; i++) {
52
        //    logger.debug("queryParts [" + i + "] = '" + queryParts[i] +"'");
53
        //}
54

  
55
        TranslatedQuery translatedQuery = translator.getTranslatedQuery(queryParts[0].replace("query=",""));
56

  
57
        if (epr.getParameter("action").equals("lookup")) {
58
            queryOpts.add("q", translatedQuery.asLucene());
59
            queryOpts.add("fl", "__result");
60

  
61
          if (translatedQuery.getOptions() != null && translatedQuery.getOptions().getSort()!= null  ) {
62
                queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode());
63
          }
64

  
65
        } else if (epr.getParameter("action").equals("browse")) {
66
            logger.debug("Browse query: " + translatedQuery.asLucene());
67

  
68
            queryOpts.add("q", translatedQuery.asLucene());
69
            queryOpts.add("facet", "true");
70
            queryOpts.add("facet.mincount", "1");
71

  
72
            if (queryParts.length > 1) {
73

  
74
                queryOpts.add("facet.threads", queryParts[1].split(",").length + "");
75

  
76
                for (String field:queryParts[1].split(","))
77
                    queryOpts.add("facet.field", field);
78
            }
79
        }
80

  
81

  
82
        queryOpts.add("shards.tolerant","true");
83

  
84 47
        String layout = epr.getParameter("layout");
85 48
        String mdformat = epr.getParameter("mdformat");
86 49
        String interpretation = epr.getParameter("interpretation");
......
88 51
        solrClient.setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
89 52
    }
90 53

  
54

  
55

  
91 56
    @Override
92 57
    public boolean isOpen() {
93 58
        return true;
......
105 70

  
106 71
    @Override
107 72
    public int size() {
108
        if (size == -1) {
109
            try {
110
                size = getSize();
111
            } catch (SolrServerException sse) {
112
               logger.error("Fail to get size", sse);
113
            }
114
        }
73
        return (int) size;
74
    }
115 75

  
116
        return size;
76
    @Override
77
    @Deprecated
78
    public List<String> getElements(int from, int to) {
79
        return get(from, to);
117 80
    }
118 81

  
119
    private int getSize() throws SolrServerException {
120
        //logger.debug("Query opts" + queryOpts);
82
    List<FacetField> facetFields = null;
83

  
84
    @Override
85
    @Deprecated
86
    public List<String> get(int from, int to) {
87
        List<String> res = new ArrayList<String>();
88

  
121 89
        QueryResponse rsp = null;
122
        //synchronized (solrClient) {
90

  
91
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
92

  
93
        logger.debug("from: " + from);
94
        logger.debug("to: " + to);
95

  
96

  
97
        queryOpts.add("start", from+1 + "");
98
        queryOpts.add("rows", to + 1+"");
99

  
100
        try {
123 101
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
124
        //}
102
            facetFields = rsp.getFacetFields();
103
            SolrDocumentList docs = rsp.getResults();
125 104

  
126
        //logger.debug(queryOpts);
105
            if (facetFields!=null && !facetFields.isEmpty()) {
106
                for (int i = from - 1; i < to; i++) {
107
                    for (FacetField field : facetFields) {
108
                        if (field.getValueCount() > i) {
109
                            BrowseField bf = new BrowseField();
110
                            bf.setId(field.getValues().get(i).getName());
111
                            bf.setName(field.getValues().get(i).getName());
112
                            bf.setCount(field.getValues().get(i).getCount() + "");
113
                            if (map.get(field.getName()) == null) {
114
                                map.put(field.getName(), new ArrayList<String>());
115
                            }
127 116

  
128
        if (epr.getParameter("action").equals("lookup")) {
129
            return (int) rsp.getResults().getNumFound();
130
        } else if (epr.getParameter("action").equals("browse")) {
131
            int max = -12;
117
                            map.get(field.getName()).add(new Gson().toJson(bf));
118
                        }
119
                    }
120
                }
132 121

  
133
            for (FacetField field:rsp.getFacetFields()) {
134
                if (field.getValueCount() > max)
135
                    max = field.getValueCount();
122
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
123
                    StringBuilder builder = new StringBuilder();
124
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
125
                    builder.append(facetEntry.getValue());
126
                    res.add(builder.toString());
127
                }
136 128
            }
137 129

  
138
            return max;
130
            logger.debug("time: " + rsp.getElapsedTime());
131
            logger.debug("found: " + docs.getNumFound());
132
            logger.debug("docs: " + docs.size());
133

  
134
            for (int i = 0; i < docs.size(); i++) {
135
                String result = ((ArrayList<String>) docs.get(i).get("__result")).get(0);
136
                res.add(result);
137
            }
138

  
139
            return res;
140

  
141
        } catch (SolrServerException sse) {
142
            logger.error("Fail to get results from Solr. ", sse);
139 143
        }
140 144

  
141
        return 0;
145
        return null;
142 146
    }
143 147

  
144
    @Override
145
    public List<String> getElements(int from, int to) {
146
        return get(from, to);
147
    }
148
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
149
        List<String> refineSolrResults = new ArrayList<String>();
150
        List<String> searchSolrResults = new ArrayList<String>();
148 151

  
149
    @Override
150
    public List<String> get(int from, int to) {
152
        logger.debug("format: " + format);
151 153

  
152
        logger.debug("Getting records from " + from + "  to " + to);
154
        QueryResponse rsp = null;
155
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
153 156

  
154
        if ("lookup".equals(epr.getParameter("action")))
157
        //logger.info("from: " + from*to);
158
        //logger.info("to: " + to);
155 159

  
156
            return getDocumentResults(from, to);
157
        else if ("browse".equals(epr.getParameter("action"))) {
160
        queryOpts.add("start", from*to + "");
161
        queryOpts.add("rows", to +"");
158 162

  
159
            return getBrowseResults(from, to);
160
        }
163
        try {
164
            long startTime = System.nanoTime();
165
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
166
            long estimatedTime = System.nanoTime() - startTime;
167
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
168
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
169
                    + from + " and size " + to);
161 170

  
162
        return null;
163
    }
171
            facetFields = rsp.getFacetFields();
164 172

  
165
    List<FacetField> facetFields = null;
173
            SolrDocumentList docs = rsp.getResults();
166 174

  
167
    private List<String> getBrowseResults(int from, int to) {
168
        List<String> res = new ArrayList<String>();
169
        //TODO add here logs
175
            this.size = docs.getNumFound();
170 176

  
171
        if (facetFields == null) {
172
            try {
173
                QueryResponse rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
174
                facetFields = rsp.getFacetFields();
177
            if (facetFields!=null && !facetFields.isEmpty()) {
175 178

  
176
            } catch (SolrServerException sse) {
177
                logger.error("Faceted fields caused exception. ", sse);
178
            }
179
                logger.debug("Checking " + (format != null && format.equals(MediaType.APPLICATION_JSON)));
179 180

  
180
            for (int i = from - 1; i < to; i++) {
181
                StringBuilder sb = new StringBuilder();
181
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
182
                    for (FacetField field : facetFields) {
183
                        map.put(field.getName(), new ArrayList<String>());
184
                        BrowseField bf = null;
185
                        for (int i = 0; i < field.getValueCount(); i++) {
186
                            bf = new BrowseField();
187
                            logger.debug(field.getValues().get(i).getName());
188
                            logger.debug(StringEscapeUtils.unescapeJava(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName())));
182 189

  
183
                sb.append("<row>");
184
                for (FacetField field:facetFields) {
185
                    if (field.getValueCount() > i) {
186
                        sb.append("<groupresult field=\"").append(field.getName()).append("\">");
187
                        sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
188
                        sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
190
                            bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
189 191

  
190
                        String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
191
                        if(facetValues.length > 1) {
192
                            sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
193
                        } else {
194
                            sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
192
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
193
                            if (facetedValues.length > 1) {
194
                                bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
195

  
196
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
197
                                bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
198

  
199
                            } else {
200
                                bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
201
                            }
202

  
203
                            bf.setCount(field.getValues().get(i).getCount() + "");
204
                            map.get(field.getName()).add(new Gson().toJson(bf));
195 205
                        }
196
                        sb.append("</groupresult>");
206

  
197 207
                    }
208

  
209
                    StringBuilder builder = null;
210

  
211
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
212
                        builder = new StringBuilder();
213
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
214
                        builder.append(facetEntry.getValue());
215
                        refineSolrResults.add(builder.toString());
216
                    }
217

  
218
                } else { //the old implementation & xml as default //TODO check compatibility
219
                    logger.debug("Creating old browse results.");
220
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
198 221
                }
199
                sb.append("</row>");
222
            }
200 223

  
201
                logger.debug("row: " + sb.toString());
202
                res.add(sb.toString());
224
            for (int i = 0; i < docs.size(); i++) {
225
                String result = ((ArrayList<String>) docs.get(i).get("__result")).get(0);
226
                try {
227
                    if (transformer != null) {
228
                        //logger.debug("1 >>>>>>" + result);
229
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
230
                        result = transformer.transform(xml);
231
                        //logger.debug("2 >>>>>>" + result);
232
                    }
233
                } catch (TransformerException te) {
234
                    logger.warn("Error transforming " + result, te);
235
                    continue;
236
                }
237

  
238
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
239
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
240
                } else { // default xml
241
                    searchSolrResults.add(result);
242
                }
203 243
            }
204
        }
205 244

  
206
        logger.debug("res: " + res);
245
            Map<String,List<String>> response = new HashMap<String, List<String>>();
207 246

  
208
        return res;
247
            logger.debug("refine results " + refineSolrResults);
248
            //logger.debug("search results " + searchSolrResults);
249

  
250
            response.put("refine",refineSolrResults);
251
            response.put("search", searchSolrResults);
252

  
253
            return response;
254

  
255
        } catch (SolrServerException sse) {
256
            logger.error("Error calling Solr.", sse);
257
        }
258
        return null;
209 259
    }
210 260

  
211
    private List<String> getDocumentResults(int from, int to) {
212
        try {
213
            QueryResponse rsp = null;
214
            NamedList<String> extraOpts = new NamedList<String>();
215 261

  
216
            extraOpts.add("start", (from - 1) + "");
217
            extraOpts.add("rows", (to - from) + 1 + "");
218
            extraOpts.addAll(queryOpts);
262
    //TODO get rid of this as soon as Joomla portal is out
263
    //Just copied and refactored the old one...
264
    @Deprecated
265
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
266
        int max = -12;
219 267

  
220
            //solrClient.setZkClientTimeout(20000);
221
            //synchronized (solrClient) {
222
                rsp = solrClient.query(SolrParams.toSolrParams(extraOpts));
223
            //}
268
        for (FacetField field:facetFields) {
269
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
224 270

  
225
            SolrDocumentList docs = rsp.getResults();
271
            if (field.getValueCount() > max) {
272
                max = field.getValueCount();
273
            }
274
        }
226 275

  
227
            logger.debug("time: " + rsp.getElapsedTime());
228
            logger.debug("found: " + docs.getNumFound());
276
        logger.debug("max " + max);
229 277

  
230
            List<String> res = new ArrayList<String>();
278
        for (int i = 0; i < max; i++) {
279
            StringBuilder sb = new StringBuilder();
231 280

  
232
            for (int i = 0; i < (to - from) + 1; i++) {
233
                String result = ((ArrayList<String>) docs.get(i).get("__result")).get(0);
281
            sb.append("<row>");
282
            for (FacetField field:facetFields) {
283
                if (field.getValueCount() > i) {
284
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
285
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
286
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
234 287

  
235
                res.add(result);
288
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
289
                    if(facetValues.length > 1) {
290
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
291
                    } else {
292
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
293
                    }
294
                    sb.append("</groupresult>");
295
                }
236 296
            }
297
            sb.append("</row>");
237 298

  
238
            return res;
239
        } catch (SolrServerException e) {
240
            e.printStackTrace();
299
            try {
300
                //logger.debug("row: " + sb.toString());
301
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
302

  
303
                //TODO remove
304
                res.add(oldRefineTransformer.transform(sb.toString()));
305

  
306
            } catch (TransformerException te) {
307
                logger.error("Cannot transform refine for: " + sb.toString(), te);
308
            }
241 309
        }
242

  
243
        return Collections.EMPTY_LIST;
244 310
    }
245 311

  
312
/*
246 313
    public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
247
        try {
314
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
315
        NamedList<String> queryOpts = new NamedList<String>();
248 316

  
317
     //   queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
318
        NamedList<String> extraOpts = new NamedList<String>();
249 319

  
250
            CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:8983");
251
            //solrClient.getLbServer().setConnectionTimeout(1);
320
        //extraOpts.add("start", "1");
321
       // extraOpts.add("rows", "10");
322
       // extraOpts.addAll(queryOpts);
252 323

  
253
            NamedList<String> queryOpts = new NamedList<String>();
324
        queryOpts.add("facet", "true");
325
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
254 326

  
255
            //queryOpts.put("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact result").asLucene());
256
            // queryOpts.add("facet", "true");
257
            TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
327
     //   queryOpts.add("q", "oaftype=project");
328
        queryOpts.add("facet", "true");
329
        queryOpts.add("facet.mincount", "1");
330
        queryOpts.add("fq", "popularity");
258 331

  
259
            queryOpts.add("q", translatedQuery.asLucene());
260
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
261
            // queryOpts.add("facet.field", "contextid");
262
            //  queryOpts.add("facet.field", "contextname");
263
            //  queryOpts.add("facet.mincount", "1");
264
            //  queryOpts.add("facet.threads", "10");
265
            System.out.println(translatedQuery.getOptions().getSort().getMode());
266
            System.out.println(translatedQuery.getOptions().getSort().getField());
267 332

  
268
            queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode());
269 333

  
270
            solrClient.setDefaultCollection("DMF-index-openaire");
334
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
335
       // queryOpts.add("facet.field", "contextid");
336
       //  queryOpts.add("facet.field", "contextname");
337
       //  queryOpts.add("facet.mincount", "1");
338
       //  queryOpts.add("facet.threads", "10");
339
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
340
       // System.out.println(translatedQuery.getOptions().getSort().getField());
271 341

  
272
            QueryResponse resp = null;
273
            synchronized (solrClient) {
274
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
275
            }
276
            System.out.println("time: " + resp.getElapsedTime());
277
            System.out.println("time: " + resp.getResults());
342
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
278 343

  
279
            System.out.println(resp.getFacetField("contextname").getValueCount());
344
        solrClient.setDefaultCollection("DMF-index-openaire");
280 345

  
281
            for (FacetField.Count count : resp.getFacetField("contextname").getValues())
282
                System.out.println(count.getName() + " : " + count.getCount());
346
/*        QueryResponse resp = null;
347
        synchronized (solrClient) {
348
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
349
        }*/
350
//        System.out.println("time: " + resp.getElapsedTime());
351
    //System.out.println("results: " + resp.getResults());
283 352

  
353
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
284 354

  
285
            int max = -12;
355
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
356
            System.out.println(count.getName() + " : " +  count.getCount());
286 357

  
287
            for (FacetField field : resp.getFacetFields()) {
288
                if (field.getValueCount() > max)
289
                    max = field.getValueCount();
290 358

  
291
            }
359
        int max = -12;
292 360

  
293
            System.out.println("max: " + max);
361
        for (FacetField field:resp.getFacetFields()) {
362
            if (field.getValueCount() > max)
363
                max = field.getValueCount();
294 364

  
295
        } catch (org.apache.solr.client.solrj.SolrServerException sse) {
296
            System.out.println("NNNOOOOOO "+ sse);
297 365
        }
298 366

  
299
    }
367
        System.out.println("max: " + max);
368
*/
369
//    }
300 370

  
301 371
    @Override
302 372
    public EPR getEpr() {
303 373
        return epr;
304 374
    }
375
}
305 376

  
306
    /*
307
    public static void main(String[] args) {
308
        String s = "ec__________::EC::FP7::SP4::REGIONS";
309
        String[] k = s.split("_\\:\\:");
377
class BrowseField {
378
    String name;
310 379

  
311
        System.out.println(k.length);
312
        System.out.println(s.split("_\\:\\:")[1]);
313
        System.out.println(s.split("_\\:\\:")[0]);
314
    }*/
380
    public String getName() {
381
        return name;
382
    }
383

  
384
    public void setName(String name) {
385
        this.name = name;
386
    }
387

  
388
    public String getId() {
389
        return id;
390
    }
391

  
392
    public void setId(String id) {
393
        this.id = id;
394
    }
395

  
396
    public String getCount() {
397
        return count;
398
    }
399

  
400
    public void setCount(String count) {
401
        this.count = count;
402
    }
403

  
404
    String id;
405
    String count;
406

  
407

  
315 408
}

Also available in: Unified diff