Project

General

Profile

« Previous | Next » 

Revision 61369

fixed scholexplorer to use the new datamodel

View differences:

es_connector.py
10 10
import os
11 11
from os import path
12 12

  
13

  
14 13
log = logging.getLogger("scholexplorer-portal")
15 14

  
16 15
pid_resolver = {
......
43 42
def resolveIdentifier(pid, pid_type):
44 43
    if pid_type != None:
45 44
        regex = r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b"
46
        if re.match(regex,pid):
45
        if re.match(regex, pid):
47 46
            log.debug("It should be doi")
48 47
            pid_type = 'doi'
49 48
        if pid_type.lower() in pid_resolver:
......
52 51
            if pid_type.lower() == 'openaire':
53 52
                return "https://www.openaire.eu/search/publication?articleId=%s" % pid.replace('oai:dnet:', '')
54 53
            elif pid_type.lower() == 'url':
55
                return  pid
54
                return pid
56 55
            else:
57 56
                return "http://identifiers.org/%s:%s" % (pid_type, pid)
58 57
    return ""
59 58

  
60 59

  
61

  
62

  
63

  
64 60
def create_typology_filter(value):
65 61
    return Q('match', typology=value)
66 62

  
......
106 102
        self.index_name = props['api.index']
107 103

  
108 104
    def get_main_page_stats(self):
109
        stats = dict(total =int(Search(using=self.client, index=self.index_name+"_scholix").count()/2))
105
        stats = dict(total=int(Search(using=self.client, index=self.index_name + "_scholix").count() / 2))
110 106
        for item in ['dataset', 'publication']:
111
            s= Search(using=self.client, index=self.index_name+"_object").query(Q('match', typology=item))
107
            s = Search(using=self.client, index=self.index_name + "_object").query(Q('match', typology=item))
112 108
            stats[item] = s.count()
113 109
        return stats
114 110

  
115 111
    def query_by_id(self, id):
116
        s = Search(using=self.client, index=self.index_name+"_object")
112
        s = Search(using=self.client, index=self.index_name + "_object")
117 113
        s = s.query(create_pid_query(id))
118 114
        s.aggs.bucket('typologies', 'terms', field='typology')
119 115
        s.aggs.bucket('all_datasources', 'nested', path='datasources').bucket('all_names', 'terms',
......
162 158
                                        publishers=publishers), hits=hits)
163 159

  
164 160
    def simple_query(self, textual_query, start=None, end=None, user_filter=None):
165
        s = Search(using=self.client, index=self.index_name+"_object")
166
        if not textual_query  == '*':
161
        s = Search(using=self.client, index=self.index_name + "_object")
162
        if not textual_query == '*':
167 163
            q = Q("multi_match", query=textual_query, fields=['title', 'abstract'])
168 164
        else:
169 165
            q = Q()
......
200 196
            s = s[start:end]
201 197
        response = s.execute()
202 198

  
203
        
204

  
205 199
        hits = []
206 200

  
201
        print(f"index : {self.index_name}_object")
202
        print(response.hits.total)
203

  
207 204
        for index_result in response.hits:
208
            input_source = index_result.__dict__['_d_']           
209
            fixed_titles = []            
210
            for ids in input_source.get('localIdentifier', []):
211
                ds = resolveIdentifier(ids['id'], ids['type'])
212
                ids['url'] = ds
205
            input_source = index_result.__dict__['_d_']
206
            fixed_titles = []
207
            # for ids in input_source.get('localIdentifier', []):
208
            #     ds = resolveIdentifier(ids['id'], ids['type'])
209
            #     ids['url'] = ds
213 210

  
214
            if input_source.get('title', []) is not None:           
211
            if input_source.get('title', []) is not None:
215 212
                for t in input_source.get('title', []):
216 213
                    if len(t) > 0 and t[0] == '"' and t[-1] == '"':
217 214
                        fixed_titles.append(t[1:-1])
218 215
                    else:
219 216
                        fixed_titles.append(t)
220 217
            else:
221
                fixed_titles.append("title not available")            
218
                fixed_titles.append("title not available")
222 219
            input_source['title'] = fixed_titles
223 220
            hits.append(input_source)
224
        
221

  
225 222
        pid_types = []
226 223
        for tag in response.aggs.all_pids.all_types.buckets:
227 224
            pid_types.append(dict(key=tag.key, count=tag.doc_count))
......
248 245
        query_type = Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
249 246
        args_id = {'source.dnetIdentifier': object_id}
250 247
        query_for_id = Q('nested', path='source', query=Q('bool', must=[Q('match', **args_id)]))
251
        s = Search(using=self.client).index(self.index_name+"_scholix").query(query_for_id & query_type)
248
        s = Search(using=self.client).index(self.index_name + "_scholix").query(query_for_id & query_type)
252 249
        if start:
253 250
            s = s[start:start + 10]
254
       
251

  
255 252
        response = s.execute()
256 253
        hits = []
257 254
        for index_hit in response.hits:
......
261 258
                for item in current_item['target']['identifier']:
262 259
                    c_it = item
263 260
                    c_it['url'] = resolveIdentifier(item['identifier'], item['schema'])
264
                    ids .append(c_it)
261
                    ids.append(c_it)
265 262
                current_item['target']['identifier'] = ids
266 263
            hits.append(current_item)
267 264

  
......
271 268
        if relation is None:
272 269
            return
273 270
        relSource = relation.get('source')
274
        collectedFrom = relSource.get('collectedFrom',[])
271
        collectedFrom = relSource.get('collectedFrom', [])
275 272
        if collectedFrom is not None:
276 273
            for coll in collectedFrom:
277 274
                for d in source['datasources']:
......
281 278

  
282 279
    def item_by_id(self, id, type=None, start=None):
283 280
        try:
284
            res = self.client.get(index=self.index_name+"_object",doc_type="_all", id=id, _source=True)
281
            res = self.client.get(index=self.index_name + "_object", doc_type="_all", id=id, _source=True)
285 282
            hits = []
286 283
            input_source = res['_source']
287 284
            fixed_titles = []
288
            for t in input_source.get('title',[]):
285
            for t in input_source.get('title', []):
289 286
                if len(t) > 0 and t[0] == '"' and t[-1] == '"':
290 287
                    fixed_titles.append(t[1:-1])
291 288
                else:
292 289
                    fixed_titles.append(t)
293 290
            input_source['title'] = fixed_titles
294 291

  
295
            for ids in input_source.get('localIdentifier', []):
296
                ds = resolveIdentifier(ids['id'], ids['type'])
297
                ids['url'] = ds
298 292
            related_publications = []
299 293
            related_dataset = []
300
            related_unknown = []            
294
            related_unknown = []
301 295

  
302 296
            rel_source = None
303 297
            if input_source.get('relatedPublications') > 0:
......
310 304
                else:
311 305
                    rel_source = {}
312 306

  
313

  
314

  
315 307
            if input_source.get('relatedDatasets') > 0:
316 308
                if 'dataset' == type:
317 309
                    related_dataset = self.related_type(id, 'dataset', start)
......
332 324
                             related_unknown=related_unknown))
333 325

  
334 326
            return DLIESResponse(total=1, hits=hits)
335
        except Exception as e:            
327
        except Exception as e:
336 328
            log.error("Error on getting item ")
337
            log.error(e)            
329
            log.error(e)
338 330
            log.error("on line %i" % sys.exc_info)
339 331
            return DLIESResponse()

Also available in: Unified diff