Revision 56023
Added by Sandro La Bruzzo almost 5 years ago
es_connector.py | ||
---|---|---|
1 | 1 |
from json import JSONEncoder |
2 |
|
|
3 | 2 |
import sys |
4 |
|
|
5 | 3 |
import re |
6 | 4 |
from elasticsearch import Elasticsearch |
7 | 5 |
from elasticsearch_dsl import * |
6 |
import logging |
|
7 |
from eu.dnetlib.util import get_index_properties |
|
8 | 8 |
|
9 | 9 |
import os |
10 | 10 |
from os import path |
11 | 11 |
|
12 |
|
|
13 |
log = logging.getLogger("scholexplorer-portal") |
|
14 |
|
|
12 | 15 |
pid_resolver = { |
13 | 16 |
"pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s", |
14 | 17 |
"ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s", |
... | ... | |
39 | 42 |
if pid_type != None: |
40 | 43 |
regex = r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b" |
41 | 44 |
if re.match(regex,pid): |
42 |
print "It should be doi"
|
|
45 |
log.debug("It should be doi")
|
|
43 | 46 |
pid_type = 'doi' |
44 | 47 |
if pid_type.lower() in pid_resolver: |
45 | 48 |
return pid_resolver[pid_type.lower()] % pid |
... | ... | |
53 | 56 |
return "" |
54 | 57 |
|
55 | 58 |
|
56 |
def get_property(): |
|
57 |
f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../api.properties')) |
|
58 |
p = {} |
|
59 |
for line in f: |
|
60 |
data = line.strip().split("=") |
|
61 |
p[data[0].strip()] = data[1].strip() |
|
62 |
return p |
|
63 | 59 |
|
64 | 60 |
|
61 |
|
|
65 | 62 |
def create_typology_filter(value): |
66 | 63 |
return Q('match', typology=value) |
67 | 64 |
|
... | ... | |
101 | 98 |
|
102 | 99 |
class DLIESConnector(object): |
103 | 100 |
def __init__(self): |
104 |
props = get_property()
|
|
101 |
props = get_index_properties()
|
|
105 | 102 |
self.index_host = [x.strip() for x in props['es_index'].split(',')] |
106 | 103 |
self.client = Elasticsearch(hosts=self.index_host) |
107 | 104 |
self.index_name = props['api.index'] |
108 | 105 |
|
109 | 106 |
def get_main_page_stats(self): |
110 |
stats = dict(total =Search(using=self.client, index=self.index_name).doc_type('scholix').execute().hits.total/2)
|
|
107 |
stats = dict(total =int(Search(using=self.client, index=self.index_name).doc_type('scholix').count()/2))
|
|
111 | 108 |
for item in ['dataset', 'publication']: |
112 | 109 |
s= Search(using=self.client, index=self.index_name).doc_type('object').query(Q('match', typology=item)) |
113 |
stats[item] = s.execute().hits.total
|
|
110 |
stats[item] = s.count()
|
|
114 | 111 |
return stats |
115 | 112 |
|
116 | 113 |
def query_by_id(self, id): |
... | ... | |
164 | 161 |
|
165 | 162 |
def simple_query(self, textual_query, start=None, end=None, user_filter=None): |
166 | 163 |
s = Search(using=self.client, index=self.index_name).doc_type('object') |
167 |
q = Q('match', _all=textual_query) |
|
164 |
|
|
165 |
if not textual_query == '*': |
|
166 |
q = Q('match', _all=textual_query) |
|
167 |
else: |
|
168 |
q = Q() |
|
168 | 169 |
s.aggs.bucket('typologies', 'terms', field='typology') |
169 | 170 |
s.aggs.bucket('all_datasources', 'nested', path='datasources').bucket('all_names', 'terms', |
170 | 171 |
field='datasources.datasourceName') |
... | ... | |
323 | 324 |
|
324 | 325 |
return DLIESResponse(total=1, hits=hits) |
325 | 326 |
except Exception as e: |
326 |
print "Error on getting item "
|
|
327 |
print e
|
|
328 |
print "on line %i" % sys.exc_traceback.tb_lineno
|
|
327 |
log.error("Error on getting item ")
|
|
328 |
log.error(e)
|
|
329 |
log.error("on line %i" % sys.exc_traceback.tb_lineno)
|
|
329 | 330 |
return DLIESResponse() |
Also available in: Unified diff
implemented new portal with fastapi