Project

General

Profile

1
package eu.dnetlib.data.search.solr;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.api.data.SearchServiceException;
5
import eu.dnetlib.data.search.transform.Transformer;
6
import eu.dnetlib.data.search.transform.TransformerException;
7
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
8
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
9
import eu.dnetlib.domain.EPR;
10
import eu.dnetlib.functionality.cql.CqlTranslatorImpl;
11
import gr.uoa.di.driver.enabling.resultset.ResultSet;
12
import org.apache.commons.lang.StringEscapeUtils;
13
import org.apache.log4j.Logger;
14
import org.apache.solr.client.solrj.SolrClient;
15
import org.apache.solr.client.solrj.SolrServerException;
16
import org.apache.solr.client.solrj.impl.CloudSolrClient;
17
import org.apache.solr.client.solrj.response.FacetField;
18
import org.apache.solr.client.solrj.response.QueryResponse;
19
import org.apache.solr.common.SolrDocumentList;
20
import org.apache.solr.common.params.SolrParams;
21
import org.apache.solr.common.util.NamedList;
22
import org.z3950.zing.cql.CQLParseException;
23

    
24
import javax.ws.rs.core.MediaType;
25
import java.io.IOException;
26
import java.io.OutputStream;
27
import java.util.*;
28

    
29
/**
30
 * Created by antleb on 2/4/14.
31
 */
32
public class SolrResultSet implements ResultSet<String> {
33

    
34
    private Logger logger = Logger.getLogger(getClass());
35

    
36
    private EPR epr = null;
37
    public SolrClient solrClient = null;
38

    
39
    private NamedList<String> queryOpts = new NamedList<String>();
40
    long size = -1;
41

    
42

    
43
    public SolrResultSet(EPR epr, CloudSolrClient cloudSolrClient) throws IOException, CQLParseException {
44
        logger.debug("Setting solr client " + cloudSolrClient);
45
        this.epr = epr;
46
        this.solrClient = cloudSolrClient;
47
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
48

    
49
        String layout = epr.getParameter("layout");
50
        String mdformat = epr.getParameter("mdformat");
51
        String interpretation = epr.getParameter("interpretation");
52

    
53
        ((CloudSolrClient)solrClient).setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
54
    }
55

    
56
    @Override
57
    public boolean isOpen() {
58
        return true;
59
    }
60

    
61
    @Override
62
    public boolean isAlive() {
63
        return true;
64
    }
65

    
66
    @Override
67
    public void close() {
68
/*
69
        try {
70
            logger.debug("!!!!!!!!! !!!!!!! CLOSING !!!!!!!!! !!!!!!!!!! ");
71
            solrClient.close();
72

    
73
        } catch (IOException e) {
74
            logger.error("Error closing result set.", e);
75
        }
76
*/
77
    }
78

    
79
    @Override
80
    public int size() {
81
        return (int) size;
82
    }
83

    
84
    @Override
85
    @Deprecated
86
    public List<String> getElements(int from, int to) {
87
        return get(from, to);
88
    }
89

    
90
    List<FacetField> facetFields = null;
91

    
92
    @Override
93
    @Deprecated
94
    public List<String> get(int from, int to) {
95
        List<String> res = new ArrayList<String>();
96

    
97
        QueryResponse rsp = null;
98

    
99
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
100

    
101
        logger.debug("from: " + from);
102
        logger.debug("to: " + to);
103

    
104

    
105
        queryOpts.add("start", from+1 + "");
106
        queryOpts.add("rows", to + 1+"");
107

    
108
        try {
109
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
110
            facetFields = rsp.getFacetFields();
111
            SolrDocumentList docs = rsp.getResults();
112

    
113
            if (facetFields!=null && !facetFields.isEmpty()) {
114
                for (int i = from - 1; i < to; i++) {
115
                    for (FacetField field : facetFields) {
116
                        if (field.getValueCount() > i) {
117
                            BrowseField bf = new BrowseField();
118
                            bf.setId(field.getValues().get(i).getName());
119
                            bf.setName(field.getValues().get(i).getName());
120
                            bf.setCount(field.getValues().get(i).getCount() + "");
121
                            if (map.get(field.getName()) == null) {
122
                                map.put(field.getName(), new ArrayList<String>());
123
                            }
124

    
125
                            map.get(field.getName()).add(new Gson().toJson(bf));
126
                        }
127
                    }
128
                }
129

    
130
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
131
                    StringBuilder builder = new StringBuilder();
132
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
133
                    builder.append(facetEntry.getValue());
134
                    res.add(builder.toString());
135
                }
136
            }
137

    
138
            logger.debug("time: " + rsp.getElapsedTime());
139
            logger.debug("found: " + docs.getNumFound());
140
            logger.debug("docs: " + docs.size());
141

    
142
            for (int i = 0; i < docs.size(); i++) {
143
                String result = (String) docs.get(i).get("__result");
144
                res.add(result);
145
            }
146

    
147
            return res;
148

    
149
        } catch (SolrServerException sse) {
150
            logger.error("Fail to get results from Solr. ", sse);
151

    
152
        } catch (IOException ioe) {
153
            logger.error("Fail to get results from Solr. ", ioe);
154
        }
155

    
156
        return null;
157
    }
158

    
159
    @Override
160
    public EPR getEpr() {
161
        return null;
162
    }
163

    
164
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
165
        List<String> refineSolrResults = new ArrayList<String>();
166
        List<String> searchSolrResults = new ArrayList<String>();
167

    
168
        QueryResponse rsp = null;
169
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
170

    
171
        queryOpts.add("start", from*to + "");
172
        queryOpts.add("rows", to +"");
173

    
174
        try {
175
            long startTime = System.nanoTime();
176

    
177
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
178
            long estimatedTime = System.nanoTime() - startTime;
179
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
180
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
181
                    + from + " and size " + to);
182

    
183
            facetFields = rsp.getFacetFields();
184

    
185
            SolrDocumentList docs = rsp.getResults();
186

    
187
            this.size = docs.getNumFound();
188

    
189
            if (facetFields!=null && !facetFields.isEmpty()) {
190
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
191
                    for (FacetField field : facetFields) {
192
                        map.put(field.getName(), new ArrayList<String>());
193
                        BrowseField bf = null;
194
                        for (int i = 0; i < field.getValueCount(); i++) {
195
                            bf = new BrowseField();
196
                            //bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
197
                            bf.setId(field.getValues().get(i).getName());
198
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
199

    
200

    
201
                            logger.debug("faceted values " + Arrays.toString(facetedValues));
202

    
203
                            if (facetedValues.length > 1) {
204
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
205
                                bf.setName(facetedValues[1]);
206
                                logger.debug("faceted values [1] " + facetedValues[1]);
207

    
208
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
209
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
210
                                bf.setName(field.getValues().get(i).getName().split("\\:\\:",2)[1].replaceAll("\\:\\:", "\\|"));
211

    
212
                            } else {
213
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
214
                                bf.setName(field.getValues().get(i).getName());
215
                            }
216

    
217
                            bf.setCount(field.getValues().get(i).getCount() + "");
218
                            map.get(field.getName()).add(new Gson().toJson(bf));
219
                        }
220

    
221
                    }
222

    
223
                    StringBuilder builder = null;
224

    
225
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
226
                        builder = new StringBuilder();
227
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
228
                        builder.append(facetEntry.getValue());
229
                        refineSolrResults.add(builder.toString());
230
                    }
231

    
232
                } else { //the old implementation & xml as default
233
                    logger.debug("Creating old browse results.");
234
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
235
                }
236
            }
237

    
238
            for (int i = 0; i < docs.size(); i++) {
239
                String result = (String) docs.get(i).get("__result");
240

    
241
                logger.debug("["+ i +"]: " + docs.get(i).get("__result"));
242

    
243
                try {
244
                    if (transformer != null) {
245
                        logger.debug("1 >>>>>>" + result);
246
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
247
                        result = transformer.transform(xml);
248
                        logger.debug("2 >>>>>>" + result);
249
                    }
250

    
251
                } catch (TransformerException te) {
252
                    logger.warn("Error transforming " + result, te);
253
                    continue;
254
                }
255

    
256
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
257
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
258
                } else { // default xml
259
                    searchSolrResults.add(result);
260
                }
261
            }
262

    
263
            Map<String,List<String>> response = new HashMap<String, List<String>>();
264

    
265
            //logger.debug("refine results " + refineSolrResults);
266
            //logger.debug("search results " + searchSolrResults);
267

    
268
            response.put("refine",refineSolrResults);
269
            response.put("search", searchSolrResults);
270

    
271
            return response;
272

    
273
        } catch (SolrServerException sse) {
274
            logger.error("Error calling Solr.", sse);
275

    
276
        } catch (IOException ioe) {
277
            logger.error("Error calling Solr.", ioe);
278
        }
279
        return null;
280
    }
281

    
282
    public void cursorGet(Transformer transformer, OutputStream os) throws SolrServerException, SearchServiceException {
283
        queryOpts.add("start", "0");
284
        queryOpts.add("rows", "0");
285
        queryOpts.remove("rows");
286
        queryOpts.add("rows", "500");
287
        queryOpts.add("fl", "__result");
288
        queryOpts.add("shards.tolerant","true");
289
        queryOpts.add("cursorMark", "*");
290
        queryOpts.add("sort", "__indexrecordidentifier asc");
291

    
292
        String cursorMark = "*";
293
        String nextCursorMark = "";
294

    
295
        int curs = 0;
296
        try {
297
            QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
298

    
299
            while (!cursorMark.equals(nextCursorMark)) {
300
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
301
                cursorMark = nextCursorMark;
302
                nextCursorMark = resp.getNextCursorMark();
303

    
304
                for (int i = 0; i < resp.getResults().size(); i++) {
305
                    if (transformer != null) {
306
                        String result = null;
307
                        try {
308
                            result = transformer.transform((String) resp.getResults().get(i).get("__result"));
309
                            logger.debug("RESULT " + result);
310

    
311
                        } catch (TransformerException te) {
312
                            logger.warn("Error transforming " + result, te);
313
                            continue;
314
                        }
315

    
316
                        try {
317
                            os.write(result.getBytes());
318
                            os.flush();
319
                        } catch (IOException e) {
320
                            logger.error("Cursor get... ", e);
321
                            continue;
322
                        }
323
                    }
324
                }
325

    
326
                queryOpts.remove("cursorMark");
327
                queryOpts.add("cursorMark", nextCursorMark);
328
                curs++;
329
            }
330

    
331
        } catch (IOException ioe) {
332
            logger.error("Error executing solr query. ", ioe);
333
        }
334

    
335
        logger.debug("CURS " + curs);
336
    }
337

    
338

    
339
    //TODO get rid of this as soon as Joomla portal is out
340
    //Just copied and refactored the old one...
341
    @Deprecated
342
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
343
        int max = -12;
344

    
345
        for (FacetField field:facetFields) {
346
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
347

    
348
            if (field.getValueCount() > max) {
349
                max = field.getValueCount();
350
            }
351
        }
352

    
353
        logger.debug("max " + max);
354

    
355
        for (int i = 0; i < max; i++) {
356
            StringBuilder sb = new StringBuilder();
357

    
358
            sb.append("<row>");
359
            for (FacetField field:facetFields) {
360
                if (field.getValueCount() > i) {
361
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
362
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
363
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
364

    
365
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
366
                    if(facetValues.length > 1) {
367
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
368
                    } else {
369
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
370
                    }
371
                    sb.append("</groupresult>");
372
                }
373
            }
374
            sb.append("</row>");
375

    
376
            try {
377
                //logger.debug("row: " + sb.toString());
378
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
379

    
380
                //TODO remove
381
                res.add(oldRefineTransformer.transform(sb.toString()));
382

    
383
            } catch (TransformerException te) {
384
                logger.error("Cannot transform refine for: " + sb.toString(), te);
385
            }
386
        }
387
    }
388

    
389
   public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
390
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
391
        solrClient.setDefaultCollection("TMF-index-openaire");
392

    
393
        NamedList<String> queryOpts = new NamedList<String>();
394

    
395
        //q=*:*&start=0&rows=10&cursorMark=*&sort=dateofcollection asc
396
        queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype=result").asLucene());
397
        queryOpts.add("start", "0");
398
        queryOpts.add("rows", "2");
399
        //queryOpts.add("fl", "__result");
400
        queryOpts.add("shards.tolerant","true");
401
       // queryOpts.add("cursorMark", "*");
402
        queryOpts.add("sort", "resultdateofacceptance desc");
403

    
404

    
405
        //queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
406
        NamedList<String> extraOpts = new NamedList<String>();
407

    
408
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
409

    
410
        System.out.println("results " + resp.getResults().size());
411

    
412

    
413
        /*String cursorMark = "*";
414
        String nextCursorMark = "";
415

    
416
        int curs = 0;
417
        while (!cursorMark.equals(nextCursorMark)) {
418
            System.out.println("cursor " + cursorMark);
419
            System.out.println("next cursor " + nextCursorMark);
420
            cursorMark = nextCursorMark;
421
            for (int i = 0; i < resp.getResults().size(); i++) {
422
                String result = ((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0);
423
                //System.out.println(result);
424
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
425
            }
426
            nextCursorMark = resp.getNextCursorMark();
427
            queryOpts.add("cursorMark", nextCursorMark);
428

    
429
            System.out.println("CURS " + curs);
430
            curs ++;
431

    
432
        }
433

    
434

    
435
        //System.out.println((new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene()));
436

    
437

    
438

    
439
        //extraOpts.add("start", "1");
440
       // extraOpts.add("rows", "10");
441
       // extraOpts.addAll(queryOpts);
442

    
443
        //queryOpts.add("facet", "true");
444
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
445

    
446
     //   queryOpts.add("q", "oaftype=project");
447
        //queryOpts.add("facet", "true");
448
        //queryOpts.add("facet.mincount", "1");
449
        //queryOpts.add("fq", "popularity");
450

    
451

    
452

    
453
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
454
       // queryOpts.add("facet.field", "contextid");
455
       //  queryOpts.add("facet.field", "contextname");
456
       //  queryOpts.add("facet.mincount", "1");
457
       //  queryOpts.add("facet.threads", "10");
458
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
459
       // System.out.println(translatedQuery.getOptions().getSort().getField());
460

    
461
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
462

    
463

    
464

    
465
/*        QueryResponse resp = null;
466
        synchronized (solrClient) {
467
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
468
        }*/
469
//        System.out.println("time: " + resp.getElapsedTime());
470
    //System.out.println("results: " + resp.getResults());
471

    
472
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
473

    
474
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
475
            System.out.println(count.getName() + " : " +  count.getCount());
476

    
477

    
478
        int max = -12;
479

    
480
        for (FacetField field:resp.getFacetFields()) {
481
            if (field.getValueCount() > max)
482
                max = field.getValueCount();
483

    
484
        }
485

    
486
        System.out.println("max: " + max);
487
*/
488
    }
489

    
490
//    @Override
491
//    public EPR getEpr() {
492
//        return epr;
493
//   }
494
}
495

    
496
class BrowseField {
497
    String name;
498

    
499
    public String getName() {
500
        return name;
501
    }
502

    
503
    public void setName(String name) {
504
        this.name = name;
505
    }
506

    
507
    public String getId() {
508
        return id;
509
    }
510

    
511
    public void setId(String id) {
512
        this.id = id;
513
    }
514

    
515
    public String getCount() {
516
        return count;
517
    }
518

    
519
    public void setCount(String count) {
520
        this.count = count;
521
    }
522

    
523
    String id;
524
    String count;
525

    
526

    
527
}
(4-4/5)