Project

General

Profile

1
package eu.dnetlib.data.search.solr;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.api.data.SearchServiceException;
5
import eu.dnetlib.data.search.transform.Transformer;
6
import eu.dnetlib.data.search.transform.TransformerException;
7
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
8
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
9
import eu.dnetlib.domain.EPR;
10
import gr.uoa.di.driver.enabling.resultset.ResultSet;
11
import io.micrometer.core.instrument.Timer;
12
import io.micrometer.prometheus.PrometheusMeterRegistry;
13
import org.apache.commons.lang.StringEscapeUtils;
14
import org.apache.log4j.Logger;
15
import org.apache.solr.client.solrj.SolrClient;
16
import org.apache.solr.client.solrj.SolrServerException;
17
import org.apache.solr.client.solrj.impl.CloudSolrClient;
18
import org.apache.solr.client.solrj.response.FacetField;
19
import org.apache.solr.client.solrj.response.QueryResponse;
20
import org.apache.solr.common.SolrDocumentList;
21
import org.apache.solr.common.params.SolrParams;
22
import org.apache.solr.common.util.NamedList;
23
import org.z3950.zing.cql.CQLParseException;
24

    
25
import javax.ws.rs.core.MediaType;
26
import java.io.IOException;
27
import java.io.OutputStream;
28
import java.util.*;
29

    
30
/**
31
 * Created by antleb on 2/4/14.
32
 */
33

    
34
public class SolrResultSet implements ResultSet<String> {
35

    
36
    private Logger logger = Logger.getLogger(getClass());
37

    
38
    private EPR epr = null;
39
    public SolrClient solrClient = null;
40

    
41
    private NamedList<String> queryOpts = new NamedList<String>();
42
    long size = -1;
43

    
44
    private PrometheusMeterRegistry registry;
45

    
46
    public SolrResultSet(EPR epr, CloudSolrClient cloudSolrClient, PrometheusMeterRegistry registry) throws IOException, CQLParseException {
47
        logger.debug("Setting solr client " + cloudSolrClient);
48
        this.epr = epr;
49
        this.solrClient = cloudSolrClient;
50
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
51

    
52
        String layout = epr.getParameter("layout");
53
        String mdformat = epr.getParameter("mdformat");
54
        String interpretation = epr.getParameter("interpretation");
55

    
56
        ((CloudSolrClient)solrClient).setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
57

    
58
        this.registry = registry;
59
    }
60

    
61
    @Override
62
    public boolean isOpen() {
63
        return true;
64
    }
65

    
66
    @Override
67
    public boolean isAlive() {
68
        return true;
69
    }
70

    
71
    @Override
72
    public void close() {
73
/*
74
        try {
75
            logger.debug("!!!!!!!!! !!!!!!! CLOSING !!!!!!!!! !!!!!!!!!! ");
76
            solrClient.close();
77

    
78
        } catch (IOException e) {
79
            logger.error("Error closing result set.", e);
80
        }
81
*/
82
    }
83

    
84
    @Override
85
    public int size() {
86
        return (int) size;
87
    }
88

    
89
    @Override
90
    @Deprecated
91
    public List<String> getElements(int from, int to) {
92
        return get(from, to);
93
    }
94

    
95
    List<FacetField> facetFields = null;
96

    
97
    @Override
98
    @Deprecated
99
    public List<String> get(int from, int to) {
100
        List<String> res = new ArrayList<String>();
101

    
102
        QueryResponse rsp = null;
103

    
104
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
105

    
106
        logger.debug("from: " + from);
107
        logger.debug("to: " + to);
108

    
109

    
110
        queryOpts.add("start", from+1 + "");
111
        queryOpts.add("rows", to + 1+"");
112

    
113
        try {
114

    
115
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
116

    
117
            facetFields = rsp.getFacetFields();
118
            SolrDocumentList docs = rsp.getResults();
119

    
120
            if (facetFields!=null && !facetFields.isEmpty()) {
121
                for (int i = from - 1; i < to; i++) {
122
                    for (FacetField field : facetFields) {
123
                        if (field.getValueCount() > i) {
124
                            BrowseField bf = new BrowseField();
125
                            bf.setId(field.getValues().get(i).getName());
126
                            bf.setName(field.getValues().get(i).getName());
127
                            bf.setCount(field.getValues().get(i).getCount() + "");
128
                            if (map.get(field.getName()) == null) {
129
                                map.put(field.getName(), new ArrayList<String>());
130
                            }
131

    
132
                            map.get(field.getName()).add(new Gson().toJson(bf));
133
                        }
134
                    }
135
                }
136

    
137
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
138
                    StringBuilder builder = new StringBuilder();
139
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
140
                    builder.append(facetEntry.getValue());
141
                    res.add(builder.toString());
142
                }
143
            }
144

    
145
            logger.debug("time: " + rsp.getElapsedTime());
146
            logger.debug("found: " + docs.getNumFound());
147
            logger.debug("docs: " + docs.size());
148

    
149
            for (int i = 0; i < docs.size(); i++) {
150
                String result = (String) docs.get(i).get("__result");
151
                res.add(result);
152
            }
153

    
154
            return res;
155

    
156
        } catch (SolrServerException sse) {
157
            logger.error("Fail to get results from Solr. ", sse);
158

    
159
        } catch (IOException ioe) {
160
            logger.error("Fail to get results from Solr. ", ioe);
161
        }
162

    
163
        return null;
164
    }
165

    
166
    @Override
167
    public EPR getEpr() {
168
        return null;
169
    }
170

    
171
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
172
        List<String> refineSolrResults = new ArrayList<String>();
173
        List<String> searchSolrResults = new ArrayList<String>();
174

    
175
        QueryResponse rsp = null;
176
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
177

    
178
        queryOpts.add("start", from*to + "");
179
        queryOpts.add("rows", to +"");
180

    
181
        long startTime = System.nanoTime();
182

    
183
        try {
184
            io.micrometer.core.instrument.Timer.Sample timer = Timer.start(registry);
185
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
186
            timer.stop(registry.timer("solr.server.response.duration"));
187

    
188

    
189
            long estimatedTime = System.nanoTime() - startTime;
190
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
191
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
192
                    + from + " and size " + to);
193

    
194
            facetFields = rsp.getFacetFields();
195

    
196
            SolrDocumentList docs = rsp.getResults();
197

    
198
            this.size = docs.getNumFound();
199

    
200
            if (facetFields!=null && !facetFields.isEmpty()) {
201
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
202
                    for (FacetField field : facetFields) {
203
                        map.put(field.getName(), new ArrayList<String>());
204
                        BrowseField bf = null;
205
                        for (int i = 0; i < field.getValueCount(); i++) {
206
                            bf = new BrowseField();
207
                            //bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
208
                            bf.setId(field.getValues().get(i).getName());
209
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
210

    
211

    
212
                            logger.debug("faceted values " + Arrays.toString(facetedValues));
213

    
214
                            if (facetedValues.length > 1) {
215
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
216
                                bf.setName(facetedValues[1]);
217
                                logger.debug("faceted values [1] " + facetedValues[1]);
218

    
219
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
220
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
221
                                bf.setName(field.getValues().get(i).getName().split("\\:\\:",2)[1].replaceAll("\\:\\:", "\\|"));
222

    
223
                            } else {
224
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
225
                                bf.setName(field.getValues().get(i).getName());
226
                            }
227

    
228
                            bf.setCount(field.getValues().get(i).getCount() + "");
229
                            map.get(field.getName()).add(new Gson().toJson(bf));
230
                        }
231

    
232
                    }
233

    
234
                    StringBuilder builder = null;
235

    
236
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
237
                        builder = new StringBuilder();
238
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
239
                        builder.append(facetEntry.getValue());
240
                        refineSolrResults.add(builder.toString());
241
                    }
242

    
243
                } else { //the old implementation & xml as default
244
                    logger.debug("Creating old browse results.");
245
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
246
                }
247
            }
248

    
249
            for (int i = 0; i < docs.size(); i++) {
250
                String result = (String) docs.get(i).get("__result");
251

    
252
                logger.debug("["+ i +"]: " + docs.get(i).get("__result"));
253

    
254
                try {
255
                    if (transformer != null) {
256
                        logger.debug("1 >>>>>>" + result);
257
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
258
                        result = transformer.transform(xml);
259
                        logger.debug("2 >>>>>>" + result);
260
                    }
261

    
262
                } catch (TransformerException te) {
263
                    logger.warn("Error transforming " + result, te);
264
                    continue;
265
                }
266

    
267
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
268
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
269
                } else { // default xml
270
                    searchSolrResults.add(result);
271
                }
272
            }
273

    
274
            Map<String,List<String>> response = new HashMap<String, List<String>>();
275

    
276
            //logger.debug("refine results " + refineSolrResults);
277
            //logger.info("search results SIZE " + searchSolrResults.size());
278
            //logger.info("search results " + searchSolrResults);
279

    
280

    
281
            response.put("refine",refineSolrResults);
282
            response.put("search", searchSolrResults);
283

    
284
            return response;
285

    
286
        } catch (SolrServerException sse) {
287
            logger.error("Error calling Solr.", sse);
288

    
289
        } catch (IOException ioe) {
290
            logger.error("Error calling Solr.", ioe);
291

    
292
        }
293

    
294
        return null;
295
    }
296

    
297
    public void cursorGet(Transformer transformer, OutputStream os) throws SolrServerException, SearchServiceException {
298
        queryOpts.add("start", "0");
299
        queryOpts.add("rows", "0");
300
        queryOpts.remove("rows");
301
        queryOpts.add("rows", "500");
302
        queryOpts.add("fl", "__result");
303
        queryOpts.add("shards.tolerant","true");
304
        queryOpts.add("cursorMark", "*");
305
        queryOpts.add("sort", "__indexrecordidentifier asc");
306

    
307
        String cursorMark = "*";
308
        String nextCursorMark = "";
309

    
310
        int curs = 0;
311
        try {
312
            QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
313

    
314
            while (!cursorMark.equals(nextCursorMark)) {
315
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
316
                cursorMark = nextCursorMark;
317
                nextCursorMark = resp.getNextCursorMark();
318

    
319
                for (int i = 0; i < resp.getResults().size(); i++) {
320
                    if (transformer != null) {
321
                        String result = null;
322
                        try {
323
                            result = transformer.transform((String) resp.getResults().get(i).get("__result"));
324
                            logger.debug("RESULT " + result);
325

    
326
                        } catch (TransformerException te) {
327
                            logger.warn("Error transforming " + result, te);
328
                            continue;
329
                        }
330

    
331
                        try {
332
                            os.write(result.getBytes());
333
                            os.flush();
334
                        } catch (IOException e) {
335
                            logger.error("Cursor get... ", e);
336
                            continue;
337
                        }
338
                    }
339
                }
340

    
341
                queryOpts.remove("cursorMark");
342
                queryOpts.add("cursorMark", nextCursorMark);
343
                curs++;
344
            }
345

    
346
        } catch (IOException ioe) {
347
            logger.error("Error executing solr query. ", ioe);
348
        }
349

    
350
        logger.debug("CURS " + curs);
351
    }
352

    
353

    
354
    //TODO get rid of this as soon as Joomla portal is out
355
    //Just copied and refactored the old one...
356
    @Deprecated
357
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
358
        int max = -12;
359

    
360
        for (FacetField field:facetFields) {
361
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
362

    
363
            if (field.getValueCount() > max) {
364
                max = field.getValueCount();
365
            }
366
        }
367

    
368
        logger.debug("max " + max);
369

    
370
        for (int i = 0; i < max; i++) {
371
            StringBuilder sb = new StringBuilder();
372

    
373
            sb.append("<row>");
374
            for (FacetField field:facetFields) {
375
                if (field.getValueCount() > i) {
376
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
377
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
378
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
379

    
380
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
381
                    if(facetValues.length > 1) {
382
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
383
                    } else {
384
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
385
                    }
386
                    sb.append("</groupresult>");
387
                }
388
            }
389
            sb.append("</row>");
390

    
391
            try {
392
                //logger.debug("row: " + sb.toString());
393
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
394

    
395
                //TODO remove
396
                res.add(oldRefineTransformer.transform(sb.toString()));
397

    
398
            } catch (TransformerException te) {
399
                logger.error("Cannot transform refine for: " + sb.toString(), te);
400
            }
401
        }
402
    }
403

    
404
 /*   public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
405
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
406
        solrClient.setDefaultCollection("DMF-index-openaire");
407

    
408
        NamedList<String> queryOpts = new NamedList<String>();
409

    
410
        //q=*:*&start=0&rows=10&cursorMark=*&sort=dateofcollection asc
411
        queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene());
412
        queryOpts.add("start", "0");
413
        queryOpts.add("rows", "1");
414
        queryOpts.add("fl", "__result");
415
        queryOpts.add("shards.tolerant","true");
416
        queryOpts.add("cursorMark", "*");
417
        queryOpts.add("sort", "__indexrecordidentifier asc");
418

    
419

    
420
        //queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
421
        NamedList<String> extraOpts = new NamedList<String>();
422

    
423
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
424

    
425
        System.out.println("results " + resp.getResults().size());
426

    
427
        String cursorMark = "*";
428
        String nextCursorMark = "";
429

    
430
        int curs = 0;
431
        while (!cursorMark.equals(nextCursorMark)) {
432
            System.out.println("cursor " + cursorMark);
433
            System.out.println("next cursor " + nextCursorMark);
434
            cursorMark = nextCursorMark;
435
            for (int i = 0; i < resp.getResults().size(); i++) {
436
                String result = ((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0);
437
                //System.out.println(result);
438
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
439
            }
440
            nextCursorMark = resp.getNextCursorMark();
441
            queryOpts.add("cursorMark", nextCursorMark);
442

    
443
            System.out.println("CURS " + curs);
444
            curs ++;
445

    
446
        }
447

    
448

    
449
        //System.out.println((new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene()));
450

    
451

    
452

    
453
        //extraOpts.add("start", "1");
454
       // extraOpts.add("rows", "10");
455
       // extraOpts.addAll(queryOpts);
456

    
457
        //queryOpts.add("facet", "true");
458
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
459

    
460
     //   queryOpts.add("q", "oaftype=project");
461
        //queryOpts.add("facet", "true");
462
        //queryOpts.add("facet.mincount", "1");
463
        //queryOpts.add("fq", "popularity");
464

    
465

    
466

    
467
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
468
       // queryOpts.add("facet.field", "contextid");
469
       //  queryOpts.add("facet.field", "contextname");
470
       //  queryOpts.add("facet.mincount", "1");
471
       //  queryOpts.add("facet.threads", "10");
472
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
473
       // System.out.println(translatedQuery.getOptions().getSort().getField());
474

    
475
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
476

    
477

    
478

    
479
/*        QueryResponse resp = null;
480
        synchronized (solrClient) {
481
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
482
        }*/
483
//        System.out.println("time: " + resp.getElapsedTime());
484
    //System.out.println("results: " + resp.getResults());
485

    
486
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
487

    
488
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
489
            System.out.println(count.getName() + " : " +  count.getCount());
490

    
491

    
492
        int max = -12;
493

    
494
        for (FacetField field:resp.getFacetFields()) {
495
            if (field.getValueCount() > max)
496
                max = field.getValueCount();
497

    
498
        }
499

    
500
        System.out.println("max: " + max);
501
*/
502
 //   }
503

    
504
//    @Override
505
//    public EPR getEpr() {
506
//        return epr;
507
//   }
508
}
509

    
510
class BrowseField {
511
    String name;
512

    
513
    public String getName() {
514
        return name;
515
    }
516

    
517
    public void setName(String name) {
518
        this.name = name;
519
    }
520

    
521
    public String getId() {
522
        return id;
523
    }
524

    
525
    public void setId(String id) {
526
        this.id = id;
527
    }
528

    
529
    public String getCount() {
530
        return count;
531
    }
532

    
533
    public void setCount(String count) {
534
        this.count = count;
535
    }
536

    
537
    String id;
538
    String count;
539

    
540

    
541
}
(4-4/5)