Project

General

Profile

1
package eu.dnetlib.data.search.solr;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.api.data.SearchServiceException;
5
import eu.dnetlib.data.search.transform.Transformer;
6
import eu.dnetlib.data.search.transform.TransformerException;
7
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
8
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
9
import eu.dnetlib.domain.EPR;
10
import gr.uoa.di.driver.enabling.resultset.ResultSet;
11
import org.apache.commons.lang.StringEscapeUtils;
12
import org.apache.log4j.Logger;
13
import org.apache.solr.client.solrj.SolrClient;
14
import org.apache.solr.client.solrj.SolrServerException;
15
import org.apache.solr.client.solrj.impl.CloudSolrClient;
16
import org.apache.solr.client.solrj.response.FacetField;
17
import org.apache.solr.client.solrj.response.QueryResponse;
18
import org.apache.solr.common.SolrDocumentList;
19
import org.apache.solr.common.params.SolrParams;
20
import org.apache.solr.common.util.NamedList;
21
import org.z3950.zing.cql.CQLParseException;
22

    
23
import javax.ws.rs.core.MediaType;
24
import java.io.IOException;
25
import java.io.OutputStream;
26
import java.util.*;
27

    
28
/**
29
 * Created by antleb on 2/4/14.
30
 */
31
public class SolrResultSet implements ResultSet<String> {
32

    
33
    private Logger logger = Logger.getLogger(getClass());
34

    
35
    private EPR epr = null;
36
    public SolrClient solrClient = null;
37

    
38
    private NamedList<String> queryOpts = new NamedList<String>();
39
    long size = -1;
40

    
41

    
42
    public SolrResultSet(EPR epr, CloudSolrClient cloudSolrClient) throws IOException, CQLParseException {
43
        logger.debug("Setting solr client " + cloudSolrClient);
44
        this.epr = epr;
45
        this.solrClient = cloudSolrClient;
46
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
47

    
48
        String layout = epr.getParameter("layout");
49
        String mdformat = epr.getParameter("mdformat");
50
        String interpretation = epr.getParameter("interpretation");
51

    
52
        ((CloudSolrClient)solrClient).setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
53
    }
54

    
55
    @Override
56
    public boolean isOpen() {
57
        return true;
58
    }
59

    
60
    @Override
61
    public boolean isAlive() {
62
        return true;
63
    }
64

    
65
    @Override
66
    public void close() {
67
/*
68
        try {
69
            logger.debug("!!!!!!!!! !!!!!!! CLOSING !!!!!!!!! !!!!!!!!!! ");
70
            solrClient.close();
71

    
72
        } catch (IOException e) {
73
            logger.error("Error closing result set.", e);
74
        }
75
*/
76
    }
77

    
78
    @Override
79
    public int size() {
80
        return (int) size;
81
    }
82

    
83
    @Override
84
    @Deprecated
85
    public List<String> getElements(int from, int to) {
86
        return get(from, to);
87
    }
88

    
89
    List<FacetField> facetFields = null;
90

    
91
    @Override
92
    @Deprecated
93
    public List<String> get(int from, int to) {
94
        List<String> res = new ArrayList<String>();
95

    
96
        QueryResponse rsp = null;
97

    
98
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
99

    
100
        logger.debug("from: " + from);
101
        logger.debug("to: " + to);
102

    
103

    
104
        queryOpts.add("start", from+1 + "");
105
        queryOpts.add("rows", to + 1+"");
106

    
107
        try {
108
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
109
            facetFields = rsp.getFacetFields();
110
            SolrDocumentList docs = rsp.getResults();
111

    
112
            if (facetFields!=null && !facetFields.isEmpty()) {
113
                for (int i = from - 1; i < to; i++) {
114
                    for (FacetField field : facetFields) {
115
                        if (field.getValueCount() > i) {
116
                            BrowseField bf = new BrowseField();
117
                            bf.setId(field.getValues().get(i).getName());
118
                            bf.setName(field.getValues().get(i).getName());
119
                            bf.setCount(field.getValues().get(i).getCount() + "");
120
                            if (map.get(field.getName()) == null) {
121
                                map.put(field.getName(), new ArrayList<String>());
122
                            }
123

    
124
                            map.get(field.getName()).add(new Gson().toJson(bf));
125
                        }
126
                    }
127
                }
128

    
129
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
130
                    StringBuilder builder = new StringBuilder();
131
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
132
                    builder.append(facetEntry.getValue());
133
                    res.add(builder.toString());
134
                }
135
            }
136

    
137
            logger.debug("time: " + rsp.getElapsedTime());
138
            logger.debug("found: " + docs.getNumFound());
139
            logger.debug("docs: " + docs.size());
140

    
141
            for (int i = 0; i < docs.size(); i++) {
142
                String result = (String) docs.get(i).get("__result");
143
                res.add(result);
144
            }
145

    
146
            return res;
147

    
148
        } catch (SolrServerException sse) {
149
            logger.error("Fail to get results from Solr. ", sse);
150

    
151
        } catch (IOException ioe) {
152
            logger.error("Fail to get results from Solr. ", ioe);
153
        }
154

    
155
        return null;
156
    }
157

    
158
    @Override
159
    public EPR getEpr() {
160
        return null;
161
    }
162

    
163
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
164
        List<String> refineSolrResults = new ArrayList<String>();
165
        List<String> searchSolrResults = new ArrayList<String>();
166

    
167
        QueryResponse rsp = null;
168
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
169

    
170
        queryOpts.add("start", from*to + "");
171
        queryOpts.add("rows", to +"");
172

    
173
        try {
174
            long startTime = System.nanoTime();
175

    
176
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
177
            long estimatedTime = System.nanoTime() - startTime;
178
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
179
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
180
                    + from + " and size " + to);
181

    
182
            facetFields = rsp.getFacetFields();
183

    
184
            SolrDocumentList docs = rsp.getResults();
185

    
186
            this.size = docs.getNumFound();
187

    
188
            if (facetFields!=null && !facetFields.isEmpty()) {
189
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
190
                    for (FacetField field : facetFields) {
191
                        map.put(field.getName(), new ArrayList<String>());
192
                        BrowseField bf = null;
193
                        for (int i = 0; i < field.getValueCount(); i++) {
194
                            bf = new BrowseField();
195
                            //bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
196
                            bf.setId(field.getValues().get(i).getName());
197
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
198

    
199

    
200
                            logger.debug("faceted values " + Arrays.toString(facetedValues));
201

    
202
                            if (facetedValues.length > 1) {
203
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
204
                                bf.setName(facetedValues[1]);
205
                                logger.debug("faceted values [1] " + facetedValues[1]);
206

    
207
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
208
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
209
                                bf.setName(field.getValues().get(i).getName().split("\\:\\:",2)[1].replaceAll("\\:\\:", "\\|"));
210

    
211
                            } else {
212
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
213
                                bf.setName(field.getValues().get(i).getName());
214
                            }
215

    
216
                            bf.setCount(field.getValues().get(i).getCount() + "");
217
                            map.get(field.getName()).add(new Gson().toJson(bf));
218
                        }
219

    
220
                    }
221

    
222
                    StringBuilder builder = null;
223

    
224
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
225
                        builder = new StringBuilder();
226
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
227
                        builder.append(facetEntry.getValue());
228
                        refineSolrResults.add(builder.toString());
229
                    }
230

    
231
                } else { //the old implementation & xml as default
232
                    logger.debug("Creating old browse results.");
233
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
234
                }
235
            }
236

    
237
            for (int i = 0; i < docs.size(); i++) {
238
                String result = (String) docs.get(i).get("__result");
239

    
240
                logger.debug("["+ i +"]: " + docs.get(i).get("__result"));
241

    
242
                try {
243
                    if (transformer != null) {
244
                        logger.debug("1 >>>>>>" + result);
245
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
246
                        result = transformer.transform(xml);
247
                        logger.debug("2 >>>>>>" + result);
248
                    }
249

    
250
                } catch (TransformerException te) {
251
                    logger.warn("Error transforming " + result, te);
252
                    continue;
253
                }
254

    
255
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
256
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
257
                } else { // default xml
258
                    searchSolrResults.add(result);
259
                }
260
            }
261

    
262
            Map<String,List<String>> response = new HashMap<String, List<String>>();
263

    
264
            //logger.debug("refine results " + refineSolrResults);
265
            //logger.debug("search results " + searchSolrResults);
266

    
267
            response.put("refine",refineSolrResults);
268
            response.put("search", searchSolrResults);
269

    
270
            return response;
271

    
272
        } catch (SolrServerException sse) {
273
            logger.error("Error calling Solr.", sse);
274

    
275
        } catch (IOException ioe) {
276
            logger.error("Error calling Solr.", ioe);
277
        }
278
        return null;
279
    }
280

    
281
    public void cursorGet(Transformer transformer, OutputStream os) throws SolrServerException, SearchServiceException {
282
        queryOpts.add("start", "0");
283
        queryOpts.add("rows", "0");
284
        queryOpts.remove("rows");
285
        queryOpts.add("rows", "500");
286
        queryOpts.add("fl", "__result");
287
        queryOpts.add("shards.tolerant","true");
288
        queryOpts.add("cursorMark", "*");
289
        queryOpts.add("sort", "__indexrecordidentifier asc");
290

    
291
        String cursorMark = "*";
292
        String nextCursorMark = "";
293

    
294
        int curs = 0;
295
        try {
296
            QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
297

    
298
            while (!cursorMark.equals(nextCursorMark)) {
299
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
300
                cursorMark = nextCursorMark;
301
                nextCursorMark = resp.getNextCursorMark();
302

    
303
                for (int i = 0; i < resp.getResults().size(); i++) {
304
                    if (transformer != null) {
305
                        String result = null;
306
                        try {
307
                            result = transformer.transform((String) resp.getResults().get(i).get("__result"));
308
                            logger.debug("RESULT " + result);
309

    
310
                        } catch (TransformerException te) {
311
                            logger.warn("Error transforming " + result, te);
312
                            continue;
313
                        }
314

    
315
                        try {
316
                            os.write(result.getBytes());
317
                            os.flush();
318
                        } catch (IOException e) {
319
                            logger.error("Cursor get... ", e);
320
                            continue;
321
                        }
322
                    }
323
                }
324

    
325
                queryOpts.remove("cursorMark");
326
                queryOpts.add("cursorMark", nextCursorMark);
327
                curs++;
328
            }
329

    
330
        } catch (IOException ioe) {
331
            logger.error("Error executing solr query. ", ioe);
332
        }
333

    
334
        logger.debug("CURS " + curs);
335
    }
336

    
337

    
338
    //TODO get rid of this as soon as Joomla portal is out
339
    //Just copied and refactored the old one...
340
    @Deprecated
341
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
342
        int max = -12;
343

    
344
        for (FacetField field:facetFields) {
345
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
346

    
347
            if (field.getValueCount() > max) {
348
                max = field.getValueCount();
349
            }
350
        }
351

    
352
        logger.debug("max " + max);
353

    
354
        for (int i = 0; i < max; i++) {
355
            StringBuilder sb = new StringBuilder();
356

    
357
            sb.append("<row>");
358
            for (FacetField field:facetFields) {
359
                if (field.getValueCount() > i) {
360
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
361
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
362
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
363

    
364
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
365
                    if(facetValues.length > 1) {
366
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
367
                    } else {
368
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
369
                    }
370
                    sb.append("</groupresult>");
371
                }
372
            }
373
            sb.append("</row>");
374

    
375
            try {
376
                //logger.debug("row: " + sb.toString());
377
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
378

    
379
                //TODO remove
380
                res.add(oldRefineTransformer.transform(sb.toString()));
381

    
382
            } catch (TransformerException te) {
383
                logger.error("Cannot transform refine for: " + sb.toString(), te);
384
            }
385
        }
386
    }
387

    
388
 /*   public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
389
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
390
        solrClient.setDefaultCollection("DMF-index-openaire");
391

    
392
        NamedList<String> queryOpts = new NamedList<String>();
393

    
394
        //q=*:*&start=0&rows=10&cursorMark=*&sort=dateofcollection asc
395
        queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene());
396
        queryOpts.add("start", "0");
397
        queryOpts.add("rows", "1");
398
        queryOpts.add("fl", "__result");
399
        queryOpts.add("shards.tolerant","true");
400
        queryOpts.add("cursorMark", "*");
401
        queryOpts.add("sort", "__indexrecordidentifier asc");
402

    
403

    
404
        //queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
405
        NamedList<String> extraOpts = new NamedList<String>();
406

    
407
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
408

    
409
        System.out.println("results " + resp.getResults().size());
410

    
411
        String cursorMark = "*";
412
        String nextCursorMark = "";
413

    
414
        int curs = 0;
415
        while (!cursorMark.equals(nextCursorMark)) {
416
            System.out.println("cursor " + cursorMark);
417
            System.out.println("next cursor " + nextCursorMark);
418
            cursorMark = nextCursorMark;
419
            for (int i = 0; i < resp.getResults().size(); i++) {
420
                String result = ((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0);
421
                //System.out.println(result);
422
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
423
            }
424
            nextCursorMark = resp.getNextCursorMark();
425
            queryOpts.add("cursorMark", nextCursorMark);
426

    
427
            System.out.println("CURS " + curs);
428
            curs ++;
429

    
430
        }
431

    
432

    
433
        //System.out.println((new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene()));
434

    
435

    
436

    
437
        //extraOpts.add("start", "1");
438
       // extraOpts.add("rows", "10");
439
       // extraOpts.addAll(queryOpts);
440

    
441
        //queryOpts.add("facet", "true");
442
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
443

    
444
     //   queryOpts.add("q", "oaftype=project");
445
        //queryOpts.add("facet", "true");
446
        //queryOpts.add("facet.mincount", "1");
447
        //queryOpts.add("fq", "popularity");
448

    
449

    
450

    
451
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
452
       // queryOpts.add("facet.field", "contextid");
453
       //  queryOpts.add("facet.field", "contextname");
454
       //  queryOpts.add("facet.mincount", "1");
455
       //  queryOpts.add("facet.threads", "10");
456
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
457
       // System.out.println(translatedQuery.getOptions().getSort().getField());
458

    
459
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
460

    
461

    
462

    
463
/*        QueryResponse resp = null;
464
        synchronized (solrClient) {
465
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
466
        }*/
467
//        System.out.println("time: " + resp.getElapsedTime());
468
    //System.out.println("results: " + resp.getResults());
469

    
470
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
471

    
472
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
473
            System.out.println(count.getName() + " : " +  count.getCount());
474

    
475

    
476
        int max = -12;
477

    
478
        for (FacetField field:resp.getFacetFields()) {
479
            if (field.getValueCount() > max)
480
                max = field.getValueCount();
481

    
482
        }
483

    
484
        System.out.println("max: " + max);
485
*/
486
 //   }
487

    
488
//    @Override
489
//    public EPR getEpr() {
490
//        return epr;
491
//   }
492
}
493

    
494
class BrowseField {
495
    String name;
496

    
497
    public String getName() {
498
        return name;
499
    }
500

    
501
    public void setName(String name) {
502
        this.name = name;
503
    }
504

    
505
    public String getId() {
506
        return id;
507
    }
508

    
509
    public void setId(String id) {
510
        this.id = id;
511
    }
512

    
513
    public String getCount() {
514
        return count;
515
    }
516

    
517
    public void setCount(String count) {
518
        this.count = count;
519
    }
520

    
521
    String id;
522
    String count;
523

    
524

    
525
}
(4-4/5)