Project

General

Profile

1
package eu.dnetlib.data.search.solr;
2

    
3
import com.google.gson.Gson;
4
import eu.dnetlib.api.data.SearchServiceException;
5
import eu.dnetlib.data.search.transform.Transformer;
6
import eu.dnetlib.data.search.transform.TransformerException;
7
import eu.dnetlib.data.search.utils.solr.SolrResultSetOptionsUtil;
8
import eu.dnetlib.data.search.utils.solr.SolrResultsFormatter;
9
import eu.dnetlib.domain.EPR;
10
import eu.dnetlib.functionality.cql.CqlTranslatorImpl;
11
import gr.uoa.di.driver.enabling.resultset.ResultSet;
12
import org.apache.commons.lang.StringEscapeUtils;
13
import org.apache.log4j.Logger;
14
import org.apache.solr.client.solrj.SolrServerException;
15
import org.apache.solr.client.solrj.impl.CloudSolrServer;
16
import org.apache.solr.client.solrj.response.FacetField;
17
import org.apache.solr.client.solrj.response.QueryResponse;
18
import org.apache.solr.common.SolrDocumentList;
19
import org.apache.solr.common.params.SolrParams;
20
import org.apache.solr.common.util.NamedList;
21
import org.z3950.zing.cql.CQLParseException;
22

    
23
import javax.ws.rs.core.MediaType;
24
import java.io.IOException;
25
import java.io.OutputStream;
26
import java.util.ArrayList;
27
import java.util.HashMap;
28
import java.util.List;
29
import java.util.Map;
30

    
31
/**
32
 * Created by antleb on 2/4/14.
33
 */
34
public class SolrResultSet implements ResultSet<String> {
35

    
36
    private Logger logger = Logger.getLogger(getClass());
37

    
38
    private EPR epr = null;
39
    private CloudSolrServer solrClient = null;
40

    
41
    private NamedList<String> queryOpts = new NamedList<String>();
42
    long size = -1;
43

    
44

    
45
    public SolrResultSet(EPR epr, CloudSolrServer solrClient) throws IOException, CQLParseException {
46
        this.epr = epr;
47
        this.solrClient = solrClient;
48
        this.queryOpts = SolrResultSetOptionsUtil.extractQueryOptions(epr.getParameter("query"));
49

    
50
        String layout = epr.getParameter("layout");
51
        String mdformat = epr.getParameter("mdformat");
52
        String interpretation = epr.getParameter("interpretation");
53

    
54
        solrClient.setDefaultCollection(mdformat + "-" + layout + "-" + interpretation);
55
    }
56

    
57
    @Override
58
    public boolean isOpen() {
59
        return true;
60
    }
61

    
62
    @Override
63
    public boolean isAlive() {
64
        return true;
65
    }
66

    
67
    @Override
68
    public void close() {
69
        //solrClient.shutdown();
70
    }
71

    
72
    @Override
73
    public int size() {
74
        return (int) size;
75
    }
76

    
77
    @Override
78
    @Deprecated
79
    public List<String> getElements(int from, int to) {
80
        return get(from, to);
81
    }
82

    
83
    List<FacetField> facetFields = null;
84

    
85
    @Override
86
    @Deprecated
87
    public List<String> get(int from, int to) {
88
        List<String> res = new ArrayList<String>();
89

    
90
        QueryResponse rsp = null;
91

    
92
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
93

    
94
        logger.debug("from: " + from);
95
        logger.debug("to: " + to);
96

    
97

    
98
        queryOpts.add("start", from+1 + "");
99
        queryOpts.add("rows", to + 1+"");
100

    
101
        try {
102
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
103
            facetFields = rsp.getFacetFields();
104
            SolrDocumentList docs = rsp.getResults();
105

    
106
            if (facetFields!=null && !facetFields.isEmpty()) {
107
                for (int i = from - 1; i < to; i++) {
108
                    for (FacetField field : facetFields) {
109
                        if (field.getValueCount() > i) {
110
                            BrowseField bf = new BrowseField();
111
                            bf.setId(field.getValues().get(i).getName());
112
                            bf.setName(field.getValues().get(i).getName());
113
                            bf.setCount(field.getValues().get(i).getCount() + "");
114
                            if (map.get(field.getName()) == null) {
115
                                map.put(field.getName(), new ArrayList<String>());
116
                            }
117

    
118
                            map.get(field.getName()).add(new Gson().toJson(bf));
119
                        }
120
                    }
121
                }
122

    
123
                for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
124
                    StringBuilder builder = new StringBuilder();
125
                    builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
126
                    builder.append(facetEntry.getValue());
127
                    res.add(builder.toString());
128
                }
129
            }
130

    
131
            logger.debug("time: " + rsp.getElapsedTime());
132
            logger.debug("found: " + docs.getNumFound());
133
            logger.debug("docs: " + docs.size());
134

    
135
            for (int i = 0; i < docs.size(); i++) {
136
                String result = ((ArrayList<String>) docs.get(i).get("__result")).get(0);
137
                res.add(result);
138
            }
139

    
140
            return res;
141

    
142
        } catch (SolrServerException sse) {
143
            logger.error("Fail to get results from Solr. ", sse);
144
        }
145

    
146
        return null;
147
    }
148

    
149
    @Override
150
    public EPR getEpr() {
151
        return null;
152
    }
153

    
154
    public Map<String,List<String>> newGet(int from, int to, String format, Transformer transformer, Transformer oldRefineTransformer) {
155
        List<String> refineSolrResults = new ArrayList<String>();
156
        List<String> searchSolrResults = new ArrayList<String>();
157

    
158
        logger.debug("format: " + format);
159

    
160
        QueryResponse rsp = null;
161
        HashMap<String, List<String>> map = new HashMap<String, List<String>>();
162

    
163
        //logger.info("from: " + from*to);
164
        //logger.info("to: " + to);
165

    
166
        queryOpts.add("start", from*to + "");
167
        queryOpts.add("rows", to +"");
168

    
169
        try {
170
            long startTime = System.nanoTime();
171
            rsp = solrClient.query(SolrParams.toSolrParams(queryOpts));
172
            long estimatedTime = System.nanoTime() - startTime;
173
            logger.info("Solrj time " + estimatedTime/1000000 +  " milliseconds for query:" + queryOpts.get("q") +
174
                    " and facets " + queryOpts.getAll("facet.field") + " and fq " + queryOpts.getAll("fq") + " from: "
175
                    + from + " and size " + to);
176

    
177
            facetFields = rsp.getFacetFields();
178

    
179
            SolrDocumentList docs = rsp.getResults();
180

    
181
            this.size = docs.getNumFound();
182

    
183
            if (facetFields!=null && !facetFields.isEmpty()) {
184

    
185
                logger.debug("Checking " + (format != null && format.equals(MediaType.APPLICATION_JSON)));
186

    
187
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
188
                    for (FacetField field : facetFields) {
189
                        map.put(field.getName(), new ArrayList<String>());
190
                        BrowseField bf = null;
191
                        for (int i = 0; i < field.getValueCount(); i++) {
192
                            bf = new BrowseField();
193
                            //bf.setId(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
194
                            bf.setId(field.getValues().get(i).getName());
195
                            String[] facetedValues = field.getValues().get(i).getName().split("\\|\\|",2);
196

    
197
                            if (facetedValues.length > 1) {
198
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(facetedValues[1]));
199
                                bf.setName(facetedValues[1]);
200

    
201
                            } else if (field.getValues().get(i).getName().split("_\\:\\:",2).length > 1) {
202
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName().split("\\:\\:",2)[1]).replaceAll("\\:\\:", "\\|"));
203
                                bf.setName(field.getValues().get(i).getName().split("\\:\\:",2)[1].replaceAll("\\:\\:", "\\|"));
204

    
205
                            } else {
206
                                //bf.setName(org.apache.commons.lang3.StringEscapeUtils.escapeJson(field.getValues().get(i).getName()));
207
                                bf.setName(field.getValues().get(i).getName());
208
                            }
209

    
210
                            bf.setCount(field.getValues().get(i).getCount() + "");
211
                            map.get(field.getName()).add(new Gson().toJson(bf));
212
                        }
213

    
214
                    }
215

    
216
                    StringBuilder builder = null;
217

    
218
                    for (Map.Entry<String, List<String>> facetEntry : map.entrySet()) {
219
                        builder = new StringBuilder();
220
                        builder.append("\"" + facetEntry.getKey() + "\"" + " : ");
221
                        builder.append(facetEntry.getValue());
222
                        refineSolrResults.add(builder.toString());
223
                    }
224

    
225
                } else { //the old implementation & xml as default //TODO check compatibility
226
                    logger.debug("Creating old browse results.");
227
                    createXmlRefineFields(refineSolrResults, oldRefineTransformer);
228
                }
229
            }
230

    
231
            for (int i = 0; i < docs.size(); i++) {
232
                String result = ((ArrayList<String>) docs.get(i).get("__result")).get(0);
233
                try {
234
                    if (transformer != null) {
235
                        //logger.debug("1 >>>>>>" + result);
236
                        String xml = result.replaceAll("<em>","").replaceAll("</em>","");
237
                        result = transformer.transform(xml);
238
                        //logger.debug("2 >>>>>>" + result);
239
                    }
240
                } catch (TransformerException te) {
241
                    logger.warn("Error transforming " + result, te);
242
                    continue;
243
                }
244

    
245
                if (format != null && format.equals(MediaType.APPLICATION_JSON)) {
246
                    searchSolrResults.add(SolrResultsFormatter.xml2Json(result));
247
                } else { // default xml
248
                    searchSolrResults.add(result);
249
                }
250
            }
251

    
252
            Map<String,List<String>> response = new HashMap<String, List<String>>();
253

    
254
            logger.debug("refine results " + refineSolrResults);
255
            //logger.debug("search results " + searchSolrResults);
256

    
257
            response.put("refine",refineSolrResults);
258
            response.put("search", searchSolrResults);
259

    
260
            return response;
261

    
262
        } catch (SolrServerException sse) {
263
            logger.error("Error calling Solr.", sse);
264
        }
265
        return null;
266
    }
267

    
268
    public void cursorGet(Transformer transformer, OutputStream os) throws SolrServerException, SearchServiceException {
269
        queryOpts.add("start", "0");
270
        queryOpts.add("rows", "0");
271
        queryOpts.remove("rows");
272
        queryOpts.add("rows", "500");
273
        queryOpts.add("fl", "__result");
274
        queryOpts.add("shards.tolerant","true");
275
        queryOpts.add("cursorMark", "*");
276
        queryOpts.add("sort", "__indexrecordidentifier asc");
277

    
278
        String cursorMark = "*";
279
        String nextCursorMark = "";
280

    
281
        int curs = 0;
282
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
283

    
284
        while (!cursorMark.equals(nextCursorMark)) {
285
            resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
286
            cursorMark = nextCursorMark;
287
            nextCursorMark = resp.getNextCursorMark();
288

    
289
            for (int i = 0; i < resp.getResults().size(); i++) {
290
                if (transformer != null) {
291
                    String result = null;
292
                    try {
293
                        result = transformer.transform(((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0));
294

    
295
                    } catch (TransformerException te) {
296
                        logger.warn("Error transforming " + result, te);
297
                        continue;
298
                    }
299

    
300
                    try {
301
                        os.write(result.getBytes());
302
                        os.flush();
303
                    } catch (IOException e) {
304
                        logger.error("Cursor get... ", e);
305
                        continue;
306
                    }
307
                }
308
            }
309

    
310
            queryOpts.remove("cursorMark");
311
            queryOpts.add("cursorMark", nextCursorMark);
312
            curs ++;
313
        }
314

    
315
        logger.debug("CURS " + curs);
316
    }
317

    
318

    
319
    //TODO get rid of this as soon as Joomla portal is out
320
    //Just copied and refactored the old one...
321
    @Deprecated
322
    private void createXmlRefineFields(List<String> res, Transformer oldRefineTransformer) {
323
        int max = -12;
324

    
325
        for (FacetField field:facetFields) {
326
            logger.debug("field " + field.getName() + " has count " + field.getValueCount());
327

    
328
            if (field.getValueCount() > max) {
329
                max = field.getValueCount();
330
            }
331
        }
332

    
333
        logger.debug("max " + max);
334

    
335
        for (int i = 0; i < max; i++) {
336
            StringBuilder sb = new StringBuilder();
337

    
338
            sb.append("<row>");
339
            for (FacetField field:facetFields) {
340
                if (field.getValueCount() > i) {
341
                    sb.append("<groupresult field=\"").append(field.getName()).append("\">");
342
                    sb.append("<count>").append(field.getValues().get(i).getCount()).append("</count>");
343
                    sb.append("<originalValue>").append(StringEscapeUtils.escapeXml(field.getValues().get(i).getName())).append("</originalValue>");
344

    
345
                    String[] facetValues = field.getValues().get(i).getName().split("\\|\\|");
346
                    if(facetValues.length > 1) {
347
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[1])).append("</value>");
348
                    } else {
349
                        sb.append("<value>").append(StringEscapeUtils.escapeXml(facetValues[0])).append("</value>");
350
                    }
351
                    sb.append("</groupresult>");
352
                }
353
            }
354
            sb.append("</row>");
355

    
356
            try {
357
                //logger.debug("row: " + sb.toString());
358
                //logger.debug("row2: " + oldRefineTransformer.transform(sb.toString()));
359

    
360
                //TODO remove
361
                res.add(oldRefineTransformer.transform(sb.toString()));
362

    
363
            } catch (TransformerException te) {
364
                logger.error("Cannot transform refine for: " + sb.toString(), te);
365
            }
366
        }
367
    }
368

    
369
   public static void main(String[] args) throws IOException, CQLParseException, SolrServerException {
370
        CloudSolrServer solrClient = new CloudSolrServer("beta.solr.openaire.eu:9983");
371
        solrClient.setDefaultCollection("TMF-index-openaire");
372

    
373
        NamedList<String> queryOpts = new NamedList<String>();
374

    
375
        //q=*:*&start=0&rows=10&cursorMark=*&sort=dateofcollection asc
376
        queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype=result").asLucene());
377
        queryOpts.add("start", "0");
378
        queryOpts.add("rows", "2");
379
        //queryOpts.add("fl", "__result");
380
        queryOpts.add("shards.tolerant","true");
381
       // queryOpts.add("cursorMark", "*");
382
        queryOpts.add("sort", "resultdateofacceptance desc");
383

    
384

    
385
        //queryOpts.add("q", new CqlTranslatorImpl().getTranslatedQuery("oaftype exact project").asLucene());
386
        NamedList<String> extraOpts = new NamedList<String>();
387

    
388
        QueryResponse resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
389

    
390
        System.out.println("results " + resp.getResults().size());
391

    
392

    
393
        /*String cursorMark = "*";
394
        String nextCursorMark = "";
395

    
396
        int curs = 0;
397
        while (!cursorMark.equals(nextCursorMark)) {
398
            System.out.println("cursor " + cursorMark);
399
            System.out.println("next cursor " + nextCursorMark);
400
            cursorMark = nextCursorMark;
401
            for (int i = 0; i < resp.getResults().size(); i++) {
402
                String result = ((ArrayList<String>) resp.getResults().get(i).get("__result")).get(0);
403
                //System.out.println(result);
404
                resp = solrClient.query(SolrParams.toSolrParams(queryOpts));
405
            }
406
            nextCursorMark = resp.getNextCursorMark();
407
            queryOpts.add("cursorMark", nextCursorMark);
408

    
409
            System.out.println("CURS " + curs);
410
            curs ++;
411

    
412
        }
413

    
414

    
415
        //System.out.println((new CqlTranslatorImpl().getTranslatedQuery("objIdentifier = acm_________::0002c24f82c295e925a2bdf7bbf49bfc").asLucene()));
416

    
417

    
418

    
419
        //extraOpts.add("start", "1");
420
       // extraOpts.add("rows", "10");
421
       // extraOpts.addAll(queryOpts);
422

    
423
        //queryOpts.add("facet", "true");
424
        //TranslatedQuery translatedQuery = new CqlTranslatorImpl().getTranslatedQuery("oaftype=result sortBy resultdateofacceptance/sort.descending");
425

    
426
     //   queryOpts.add("q", "oaftype=project");
427
        //queryOpts.add("facet", "true");
428
        //queryOpts.add("facet.mincount", "1");
429
        //queryOpts.add("fq", "popularity");
430

    
431

    
432

    
433
//        queryOpts.put("fq", new CqlTranslatorImpl().getTranslatedQuery("").asLucene());
434
       // queryOpts.add("facet.field", "contextid");
435
       //  queryOpts.add("facet.field", "contextname");
436
       //  queryOpts.add("facet.mincount", "1");
437
       //  queryOpts.add("facet.threads", "10");
438
       // System.out.println(translatedQuery.getOptions().getSort().getMode());
439
       // System.out.println(translatedQuery.getOptions().getSort().getField());
440

    
441
        //queryOpts.add("sort", translatedQuery.getOptions().getSort().getField() + " " + translatedQuery.getOptions().getSort().getMode() );
442

    
443

    
444

    
445
/*        QueryResponse resp = null;
446
        synchronized (solrClient) {
447
            resp = solrClient.query(SolrParams.toSolrParams(extraOpts));
448
        }*/
449
//        System.out.println("time: " + resp.getElapsedTime());
450
    //System.out.println("results: " + resp.getResults());
451

    
452
/*      System.out.println(resp.getFacetField("contextname").getValueCount());
453

    
454
        for (FacetField.Count count:resp.getFacetField("contextname").getValues())
455
            System.out.println(count.getName() + " : " +  count.getCount());
456

    
457

    
458
        int max = -12;
459

    
460
        for (FacetField field:resp.getFacetFields()) {
461
            if (field.getValueCount() > max)
462
                max = field.getValueCount();
463

    
464
        }
465

    
466
        System.out.println("max: " + max);
467
*/
468
    }
469

    
470
//    @Override
471
//    public EPR getEpr() {
472
//        return epr;
473
//   }
474
}
475

    
476
class BrowseField {
477
    String name;
478

    
479
    public String getName() {
480
        return name;
481
    }
482

    
483
    public void setName(String name) {
484
        this.name = name;
485
    }
486

    
487
    public String getId() {
488
        return id;
489
    }
490

    
491
    public void setId(String id) {
492
        this.id = id;
493
    }
494

    
495
    public String getCount() {
496
        return count;
497
    }
498

    
499
    public void setCount(String count) {
500
        this.count = count;
501
    }
502

    
503
    String id;
504
    String count;
505

    
506

    
507
}
(4-4/5)