Project

General

Profile

« Previous | Next » 

Revision 50064

made a new look

View differences:

es_connector.py
1 1
from json import JSONEncoder
2 2

  
3 3
import sys
4

  
5
import re
4 6
from elasticsearch import Elasticsearch
5 7
from elasticsearch_dsl import *
6 8

  
7 9
import os
8 10
from os import path
9 11

  
10

  
11 12
pid_resolver = {
12 13
    "pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s",
13 14
    "ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
......
35 36

  
36 37

  
37 38
def resolveIdentifier(pid, pid_type):
38
    if pid_type!= None:
39
        if pid_type.lower() in  pid_resolver:
39

  
40

  
41
    if pid_type != None:
42
        regex = r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b"
43
        if re.match(regex,pid):
44
            print "It should be doi"
45
            pid_type='doi'
46

  
47

  
48

  
49
        if pid_type.lower() in pid_resolver:
40 50
            return pid_resolver[pid_type.lower()] % pid
41 51
        else:
42 52
            if pid_type.lower() == 'openaire':
43
                return "https://www.openaire.eu/search/publication?articleId=%s"%pid.replace('oai:dnet:','')
53
                return "https://www.openaire.eu/search/publication?articleId=%s" % pid.replace('oai:dnet:', '')
44 54
            else:
45 55
                return "http://identifiers.org/%s:%s" % (pid_type, pid)
46 56
    return ""
47 57

  
58

  
48 59
def get_property():
49 60
    f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../api.properties'))
50 61
    p = {}
......
63 74
    return Q('nested', path='localIdentifier', query=Q('bool', must=[Q('match', **args)]))
64 75

  
65 76

  
77
def create_pid_query(value):
78
    args = {'localIdentifier.id': value}
79
    return Q('nested', path='localIdentifier', query=Q('bool', must=[Q('match', **args)]))
80

  
81

  
66 82
def create_publisher_filter(value):
67 83
    return Q('match', publisher=value)
68 84

  
......
93 109
        self.client = Elasticsearch(hosts=self.index_host)
94 110
        self.index_name = props['api.index']
95 111

  
112
    def query_by_id(self, id):
113
        s = Search(using=self.client, index=self.index_name).doc_type('object')
114
        s = s.query(create_pid_query(id))
115
        s.aggs.bucket('typologies', 'terms', field='typology')
116
        s.aggs.bucket('all_datasources', 'nested', path='datasources').bucket('all_names', 'terms',
117
                                                                              field='datasources.datasourceName')
118
        s.aggs.bucket('all_publisher', 'terms', field='publisher')
119
        s.aggs.bucket('all_pids', 'nested', path='localIdentifier').bucket('all_types', 'terms',
120
                                                                           field='localIdentifier.type')
121
        response = s.execute()
122

  
123
        hits = []
124

  
125
        for index_result in response.hits:
126
            input_source = index_result.__dict__['_d_']
127
            fixed_titles = []
128

  
129
            for ids in input_source.get('localIdentifier', []):
130
                ds = resolveIdentifier(ids['id'], ids['type'])
131
                ids['url'] = ds
132
            for t in input_source.get('title', []):
133
                if len(t) > 0 and t[0] == '"' and t[-1] == '"':
134
                    fixed_titles.append(t[1:-1])
135
                else:
136
                    fixed_titles.append(t)
137
            input_source['title'] = fixed_titles
138
            hits.append(input_source)
139

  
140
        pid_types = []
141
        for tag in response.aggs.all_pids.all_types.buckets:
142
            pid_types.append(dict(key=tag.key, count=tag.doc_count))
143

  
144
        datasources = []
145
        for tag in response.aggs.all_datasources.all_names.buckets:
146
            datasources.append(dict(key=tag.key, count=tag.doc_count))
147

  
148
        typologies = []
149
        for tag in response.aggs.typologies.buckets:
150
            typologies.append(dict(key=tag.key, count=tag.doc_count))
151

  
152
        publishers = []
153
        for tag in response.aggs.all_publisher.buckets:
154
            if len(tag.key) > 0:
155
                publishers.append(dict(key=tag.key, count=tag.doc_count))
156

  
157
        return DLIESResponse(total=response.hits.total,
158
                             facet=dict(pid=pid_types, typology=typologies, datasource=datasources,
159
                                        publishers=publishers), hits=hits)
160

  
96 161
    def simple_query(self, textual_query, start=None, end=None, user_filter=None):
97 162
        s = Search(using=self.client, index=self.index_name).doc_type('object')
98 163
        q = Q('match', _all=textual_query)
......
135 200
            input_source = index_result.__dict__['_d_']
136 201
            fixed_titles = []
137 202

  
138
            for ids in  input_source.get('localIdentifier',[]):
203
            for ids in input_source.get('localIdentifier', []):
139 204
                ds = resolveIdentifier(ids['id'], ids['type'])
140 205
                ids['url'] = ds
141
            for t in input_source.get('title',[]):
206
            for t in input_source.get('title', []):
142 207
                if len(t) > 0 and t[0] == '"' and t[-1] == '"':
143 208
                    fixed_titles.append(t[1:-1])
144 209
                else:
......
180 245
        hits = []
181 246

  
182 247
        for index_hit in response.hits:
183
            hits.append(index_hit.__dict__['_d_'])
248
            current_item = index_hit.__dict__['_d_']
249
            if 'target' in current_item:
250
                ids = []
251
                for item in current_item['target']['identifier']:
252
                    c_it = item
253
                    c_it['url'] = resolveIdentifier(item['identifier'], item['schema'])
254
                    ids .append(c_it)
255
                current_item['target']['identifier'] = ids
256
            hits.append(current_item)
184 257

  
185 258
        return hits
186 259

  
......
202 275
            input_source = res['_source']
203 276
            fixed_titles = []
204 277
            for t in input_source.get('title'):
205
                if len(t) >0 and t[0]=='"' and t[-1]=='"':
278
                if len(t) > 0 and t[0] == '"' and t[-1] == '"':
206 279
                    fixed_titles.append(t[1:-1])
207 280
                else:
208 281
                    fixed_titles.append(t)
209 282
            input_source['title'] = fixed_titles
210 283

  
211
            for ids in  input_source.get('localIdentifier',[]):
284
            for ids in input_source.get('localIdentifier', []):
212 285
                ds = resolveIdentifier(ids['id'], ids['type'])
213 286
                ids['url'] = ds
214 287
            related_publications = []
......
221 294
                    related_publications = self.related_type(id, 'publication', start)
222 295
                else:
223 296
                    related_publications = self.related_type(id, 'publication')
224
                if len(related_publications) >0 :
297
                if len(related_publications) > 0:
225 298
                    rel_source = related_publications[0]
226 299
                else:
227 300
                    rel_source = {}
......
248 321
        except Exception as e:
249 322
            print "Error on getting item "
250 323
            print e
251
            print "on line %i"% sys.exc_traceback.tb_lineno
324
            print "on line %i" % sys.exc_traceback.tb_lineno
252 325
            return DLIESResponse()

Also available in: Unified diff