Project

General

Profile

1
package eu.dnetlib.data.search.solr;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.api.data.SearchServiceException;
5
import eu.dnetlib.data.search.transform.Transformer;
6
import eu.dnetlib.data.search.transform.TransformerException;
7
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
8
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
9
import eu.dnetlib.domain.EPR;
10
import gr.uoa.di.driver.enabling.resultset.ResultSet;
11
import io.micrometer.core.instrument.Timer;
12
import io.micrometer.prometheus.PrometheusMeterRegistry;
13
import org.apache.commons.lang.StringEscapeUtils;
14
import org.apache.log4j.Logger;
15
import org.apache.solr.client.solrj.SolrClient;
16
import org.apache.solr.client.solrj.SolrServerException;
17
import org.apache.solr.client.solrj.impl.CloudSolrClient;
18
import org.apache.solr.client.solrj.response.FacetField;
19
import org.apache.solr.client.solrj.response.QueryResponse;
20
import org.apache.solr.common.SolrDocumentList;
21
import org.apache.solr.common.params.SolrParams;
22
import org.apache.solr.common.util.NamedList;
23
import org.z3950.zing.cql.CQLParseException;
24

    
25
import javax.ws.rs.core.MediaType;
26
import java.io.IOException;
27
import java.io.OutputStream;
28
import java.util.*;
29

    
30
/**
31
 * Created by antleb on 2/4/14.
32
 */
33

    
34
public class SolrResultSet implements ResultSet<String> {
35

    
36
    private Logger logger = Logger.getLogger(getClass());
37

    
38
    private EPR epr = null;
39
    public SolrClient solrClient = null;
40

    
41
    private NamedList<String> queryOpts = new NamedList<String>();
42
    long size = -1;
43

    
44
    private PrometheusMeterRegistry registry;
45

    
46
    public SolrResultSet(EPR epr, CloudSolrClient cloudSolrClient, PrometheusMeterRegistry registry) throws IOException, CQLParseException {
47
        logger.debug("Setting solr client " + cloudSolrClient);
48
        this.epr = epr;
49
        this.solrClient = cloudSolrClient;
50
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
51

    
52
        String layout = epr.getParameter("layout");
53
        String mdformat = epr.getParameter("mdformat");
54
        String interpretation = epr.getParameter("interpretation");
55

    
56
        ((CloudSolrClient)solrClient).setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
57

    
58
        this.registry = registry;
59
    }
60

    
61
    @Override
62
    public boolean isOpen() {
63
        return true;
64
    }
65

    
66
    @Override
67
    public boolean isAlive() {
68
        return true;
69
    }
70

    
71
    @Override
72
    public void close() {
73
/*
74
        try {
75
            logger.debug("!!!!!!!!! !!!!!!! CLOSING !!!!!!!!! !!!!!!!!!! ");
76
            solrClient.close();
77

    
78
        } catch (IOException e) {
79
            logger.error("Error closing result set.", e);
80
        }
81
*/
82
    }
83

    
84
    @Override
85
    public int size() {
86
        return (int) size;
87
    }
88

    
89
    @Override
90
    @Deprecated
91
    public List<String> getElements(int from, int to) {
92
        return get(from, to);
93
    }
94

    
95
    List<FacetField> facetFields = null;
96

    
97
    @Override
98
    @Deprecated
99
    public List<String> get(int from, int to) {
100
        List<String> res = new ArrayList<String>();
101

    
102
        QueryResponse rsp = null;
103

    
104
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
105

    
106
        logger.debug("from: " + from);
107
        logger.debug("to: " + to);
108

    
109

    
110
        queryOpts.add("start", from+1 + "");
111
        queryOpts.add("rows", to + 1+"");
112

    
113
        try {
114

    
115
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
116

    
117
            facetFields = rsp.getFacetFields();
118
            SolrDocumentList docs = rsp.getResults();
119

    
120
            if (facetFields!=null && !facetFields.isEmpty()) {
121
                for (int i = from - 1; i < to; i++) {
122
                    for (FacetField field : facetFields) {
123
                        if (field.getValueCount() > i) {
124
                            BrowseField bf = new BrowseField();
125
                            bf.setId(field.getValues().get(i).getName());
126
                            bf.setName(field.getValues().get(i).getName());
127
                            bf.setCount(field.getValues().get(i).getCount() + "");
128
                            if (map.get(field.getName()) == null) {
129
                                map.put(field.getName(), new ArrayList<String>());
130
                            }
131

    
132
                            map.get(field.getName()).add(new Gson().toJson(bf));
133
                        }
134
                    }
135
                }
136

    
137
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
138
                    StringBuilder builder = new StringBuilder();
139
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
140
                    builder.append(facetEntry.getValue());
141
                    res.add(builder.toString());
142
                }
143
            }
144

    
145
            logger.debug("time: " + rsp.getElapsedTime());
146
            logger.debug("found: " + docs.getNumFound());
147
            logger.debug("docs: " + docs.size());
148

    
149
            for (int i = 0; i < docs.size(); i++) {
150
                String result = (String) docs.get(i).get("__result");
151
                res.add(result);
152
            }
153

    
154
            return res;
155

    
156
        } catch (SolrServerException sse) {
157
            logger.error("Fail to get results from Solr. ", sse);
158

    
159
        } catch (IOException ioe) {
160
            logger.error("Fail to get results from Solr. ", ioe);
161
        }
162

    
163
        return null;
164
    }
165

    
166
    @Override
167
    public EPR getEpr() {
168
        return null;
169
    }
170

    
171
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
172
        List<String> refineSolrResults = new ArrayList<String>();
173
        List<String> searchSolrResults = new ArrayList<String>();
174

    
175
        QueryResponse rsp = null;
176
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
177

    
178
        queryOpts.add("start", from*to + "");
179
        queryOpts.add("rows", to +"");
180
        //queryOpts.add("f.resulthostingdatasource.facet.limit", "2");
181

    
182
        long startTime = System.nanoTime();
183

    
184
        try {
185
            io.micrometer.core.instrument.Timer.Sample timer = Timer.start(registry);
186
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
187
            timer.stop(registry.timer("solr.server.response.duration"));
188

    
189

    
190
            long estimatedTime = System.nanoTime() - startTime;
191
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
192
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
193
                    + from + " and size " + to);
194

    
195
            facetFields = rsp.getFacetFields();
196

    
197
            SolrDocumentList docs = rsp.getResults();
198

    
199
            this.size = docs.getNumFound();
200

    
201
            if (facetFields!=null && !facetFields.isEmpty()) {
202
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
203
                    for (FacetField field : facetFields) {
204
                        map.put(field.getName(), new ArrayList<String>());
205
                        BrowseField bf = null;
206
                        for (int i = 0; i < field.getValueCount(); i++) {
207
                            bf = new BrowseField();
208
                            //bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
209
                            bf.setId(field.getValues().get(i).getName());
210
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
211

    
212

    
213
                            logger.debug("faceted values " + Arrays.toString(facetedValues));
214

    
215
                            if (facetedValues.length > 1) {
216
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
217
                                bf.setName(facetedValues[1]);
218
                                logger.debug("faceted values [1] " + facetedValues[1]);
219

    
220
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
221
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
222
                                bf.setName(field.getValues().get(i).getName().split("\\:\\:",2)[1].replaceAll("\\:\\:", "\\|"));
223

    
224
                            } else {
225
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
226
                                bf.setName(field.getValues().get(i).getName());
227
                            }
228

    
229
                            bf.setCount(field.getValues().get(i).getCount() + "");
230
                            map.get(field.getName()).add(new Gson().toJson(bf));
231
                        }
232

    
233
                    }
234

    
235
                    StringBuilder builder = null;
236

    
237
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
238
                        builder = new StringBuilder();
239
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
240
                        builder.append(facetEntry.getValue());
241
                        refineSolrResults.add(builder.toString());
242
                    }
243

    
244
                } else { //the old implementation & xml as default
245
                    logger.debug("Creating old browse results.");
246
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
247
                }
248
            }
249

    
250
            for (int i = 0; i < docs.size(); i++) {
251
                String result = (String) docs.get(i).get("__result");
252

    
253
                logger.debug("["+ i +"]: " + docs.get(i).get("__result"));
254

    
255
                try {
256
                    if (transformer != null) {
257
                        logger.debug("1 >>>>>>" + result);
258
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
259
                        result = transformer.transform(xml);
260
                        logger.debug("2 >>>>>>" + result);
261
                    }
262

    
263
                } catch (TransformerException te) {
264
                    logger.warn("Error transforming " + result, te);
265
                    continue;
266
                }
267

    
268
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
269
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
270
                } else { // default xml
271
                    searchSolrResults.add(result);
272
                }
273
            }
274

    
275
            Map<String,List<String>> response = new HashMap<String, List<String>>();
276

    
277
            //logger.debug("refine results " + refineSolrResults);
278
            //logger.info("search results SIZE " + searchSolrResults.size());
279
            //logger.info("search results " + searchSolrResults);
280

    
281

    
282
            response.put("refine",refineSolrResults);
283
            response.put("search", searchSolrResults);
284

    
285
            return response;
286

    
287
        } catch (SolrServerException sse) {
288
            logger.error("Error calling Solr.", sse);
289

    
290
        } catch (IOException ioe) {
291
            logger.error("Error calling Solr.", ioe);
292

    
293
        }
294

    
295
        return null;
296
    }
297

    
298
    /**
299
     * limit is the maximum number of results the cursor get is allowed to fetch. If limit is set to -1 all
300
     * results are returned.
301
     */
302
    public void cursorGet(Transformer transformer, int limit, OutputStream os) throws SolrServerException, SearchServiceException {
303

    
304
        int rows = 500;
305
        int limitCounter = -1;
306

    
307
        queryOpts.add("start", "0");
308
        queryOpts.add("rows", "0");
309
        queryOpts.remove("rows");
310
        queryOpts.add("rows", rows+"");
311
        queryOpts.add("fl", "__result");
312
        queryOpts.add("shards.tolerant","true");
313
        queryOpts.add("cursorMark", "*");
314
        queryOpts.add("sort", "__indexrecordidentifier asc");
315

    
316
        String cursorMark = "*";
317
        String nextCursorMark = "";
318

    
319
        if ( limit > 0 ) {
320
            limitCounter = limit/rows;
321
            logger.info("limit counter " + limitCounter);
322
        }
323

    
324
        try {
325
            QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
326

    
327
            while (!cursorMark.equals(nextCursorMark) && ( limitCounter > 0 || limitCounter == -1)) {
328
                //logger.info(">> " + limitCounter);
329
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
330
                cursorMark = nextCursorMark;
331
                nextCursorMark = resp.getNextCursorMark();
332

    
333
                for (int i = 0; i < resp.getResults().size(); i++) {
334
                    if (transformer != null) {
335
                        String result = null;
336
                        try {
337
                            logger.debug("PRE RESULT " + resp.getResults().get(i).get("__result"));
338
                            result = transformer.transform((String) resp.getResults().get(i).get("__result"));
339
                            logger.debug("RESULT " + result);
340

    
341
                        } catch (TransformerException te) {
342
                            logger.warn("Error transforming " + result, te);
343
                            continue;
344
                        }
345

    
346
                        try {
347
                            os.write(result.getBytes());
348
                            os.flush();
349
                        } catch (IOException e) {
350
                            logger.error("Cursor get... ", e);
351
                            continue;
352
                        }
353
                    }
354
                }
355

    
356
                queryOpts.remove("cursorMark");
357
                queryOpts.add("cursorMark", nextCursorMark);
358
                limitCounter--;
359
            }
360

    
361
        } catch (IOException ioe) {
362
            logger.error("Error executing solr query. ", ioe);
363
        }
364
    }
365

    
366

    
367
    //TODO get rid of this as soon as Joomla portal is out
368
    //Just copied and refactored the old one...
369
    @Deprecated
370
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
371
        int max = -12;
372

    
373
        for (FacetField field:facetFields) {
374
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
375

    
376
            if (field.getValueCount() > max) {
377
                max = field.getValueCount();
378
            }
379
        }
380

    
381
        logger.debug("max " + max);
382

    
383
        for (int i = 0; i < max; i++) {
384
            StringBuilder sb = new StringBuilder();
385

    
386
            sb.append("<row>");
387
            for (FacetField field:facetFields) {
388
                if (field.getValueCount() > i) {
389
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
390
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
391
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
392

    
393
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
394
                    if(facetValues.length > 1) {
395
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
396
                    } else {
397
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
398
                    }
399
                    sb.append("</groupresult>");
400
                }
401
            }
402
            sb.append("</row>");
403

    
404
            try {
405
                //logger.debug("row: " + sb.toString());
406
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
407

    
408
                //TODO remove
409
                res.add(oldRefineTransformer.transform(sb.toString()));
410

    
411
            } catch (TransformerException te) {
412
                logger.error("Cannot transform refine for: " + sb.toString(), te);
413
            }
414
        }
415
    }
416

    
417
 /*   public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
418
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
419
        solrClient.setDefaultCollection("DMF-index-openaire");
420

    
421
        NamedList<String> queryOpts = new NamedList<String>();
422

    
423
        //q=*:*&start=0&rows=10&cursorMark=*&sort=dateofcollection asc
424
        queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene());
425
        queryOpts.add("start", "0");
426
        queryOpts.add("rows", "1");
427
        queryOpts.add("fl", "__result");
428
        queryOpts.add("shards.tolerant","true");
429
        queryOpts.add("cursorMark", "*");
430
        queryOpts.add("sort", "__indexrecordidentifier asc");
431

    
432

    
433
        //queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
434
        NamedList<String> extraOpts = new NamedList<String>();
435

    
436
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
437

    
438
        System.out.println("results " + resp.getResults().size());
439

    
440
        String cursorMark = "*";
441
        String nextCursorMark = "";
442

    
443
        int curs = 0;
444
        while (!cursorMark.equals(nextCursorMark)) {
445
            System.out.println("cursor " + cursorMark);
446
            System.out.println("next cursor " + nextCursorMark);
447
            cursorMark = nextCursorMark;
448
            for (int i = 0; i < resp.getResults().size(); i++) {
449
                String result = ((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0);
450
                //System.out.println(result);
451
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
452
            }
453
            nextCursorMark = resp.getNextCursorMark();
454
            queryOpts.add("cursorMark", nextCursorMark);
455

    
456
            System.out.println("CURS " + curs);
457
            curs ++;
458

    
459
        }
460

    
461

    
462
        //System.out.println((new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene()));
463

    
464

    
465

    
466
        //extraOpts.add("start", "1");
467
       // extraOpts.add("rows", "10");
468
       // extraOpts.addAll(queryOpts);
469

    
470
        //queryOpts.add("facet", "true");
471
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
472

    
473
     //   queryOpts.add("q", "oaftype=project");
474
        //queryOpts.add("facet", "true");
475
        //queryOpts.add("facet.mincount", "1");
476
        //queryOpts.add("fq", "popularity");
477

    
478

    
479

    
480
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
481
       // queryOpts.add("facet.field", "contextid");
482
       //  queryOpts.add("facet.field", "contextname");
483
       //  queryOpts.add("facet.mincount", "1");
484
       //  queryOpts.add("facet.threads", "10");
485
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
486
       // System.out.println(translatedQuery.getOptions().getSort().getField());
487

    
488
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
489

    
490

    
491

    
492
/*        QueryResponse resp = null;
493
        synchronized (solrClient) {
494
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
495
        }*/
496
//        System.out.println("time: " + resp.getElapsedTime());
497
    //System.out.println("results: " + resp.getResults());
498

    
499
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
500

    
501
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
502
            System.out.println(count.getName() + " : " +  count.getCount());
503

    
504

    
505
        int max = -12;
506

    
507
        for (FacetField field:resp.getFacetFields()) {
508
            if (field.getValueCount() > max)
509
                max = field.getValueCount();
510

    
511
        }
512

    
513
        System.out.println("max: " + max);
514
*/
515
 //   }
516

    
517
//    @Override
518
//    public EPR getEpr() {
519
//        return epr;
520
//   }
521
}
522

    
523
class BrowseField {
524
    String name;
525

    
526
    public String getName() {
527
        return name;
528
    }
529

    
530
    public void setName(String name) {
531
        this.name = name;
532
    }
533

    
534
    public String getId() {
535
        return id;
536
    }
537

    
538
    public void setId(String id) {
539
        this.id = id;
540
    }
541

    
542
    public String getCount() {
543
        return count;
544
    }
545

    
546
    public void setCount(String count) {
547
        this.count = count;
548
    }
549

    
550
    String id;
551
    String count;
552

    
553

    
554
}
(4-4/5)