/modules/dli-service-portal/branches/ES_7/eu/dnetlib/es_connector.py - Diff - D-Net - D-Net project tracking tool

« Previous | Next »

Revision 61369

Added by Sandro La Bruzzo almost 3 years ago

fixed scholexplorer to use the new datamodel

     import os
     from os import path
     log = logging.getLogger("scholexplorer-portal")
     pid_resolver = {
-...
     def resolveIdentifier(pid, pid_type):
         if pid_type != None:
             regex = r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b"
             if re.match(regex,pid):
             if re.match(regex, pid):
                 log.debug("It should be doi")
                 pid_type = 'doi'
             if pid_type.lower() in pid_resolver:
-...
                 if pid_type.lower() == 'openaire':
                     return "https://www.openaire.eu/search/publication?articleId=%s" % pid.replace('oai:dnet:', '')
                 elif pid_type.lower() == 'url':
                     return  pid
                     return pid
                 else:
                     return "http://identifiers.org/%s:%s" % (pid_type, pid)
         return ""
     def create_typology_filter(value):
         return Q('match', typology=value)
-...
             self.index_name = props['api.index']
         def get_main_page_stats(self):
             stats = dict(total =int(Search(using=self.client, index=self.index_name+"_scholix").count()/2))
             stats = dict(total=int(Search(using=self.client, index=self.index_name + "_scholix").count() / 2))
             for item in ['dataset', 'publication']:
                 s= Search(using=self.client, index=self.index_name+"_object").query(Q('match', typology=item))
                 s = Search(using=self.client, index=self.index_name + "_object").query(Q('match', typology=item))
                 stats[item] = s.count()
             return stats
         def query_by_id(self, id):
             s = Search(using=self.client, index=self.index_name+"_object")
             s = Search(using=self.client, index=self.index_name + "_object")
             s = s.query(create_pid_query(id))
             s.aggs.bucket('typologies', 'terms', field='typology')
             s.aggs.bucket('all_datasources', 'nested', path='datasources').bucket('all_names', 'terms',
-...
                                             publishers=publishers), hits=hits)
         def simple_query(self, textual_query, start=None, end=None, user_filter=None):
             s = Search(using=self.client, index=self.index_name+"_object")
             if not textual_query  == '*':
             s = Search(using=self.client, index=self.index_name + "_object")
             if not textual_query == '*':
                 q = Q("multi_match", query=textual_query, fields=['title', 'abstract'])
             else:
                 q = Q()
-...
                 s = s[start:end]
             response = s.execute()
             hits = []
             print(f"index : {self.index_name}_object")
             print(response.hits.total)
             for index_result in response.hits:
                 input_source = index_result.__dict__['_d_']
                 fixed_titles = []
                 for ids in input_source.get('localIdentifier', []):
                     ds = resolveIdentifier(ids['id'], ids['type'])
                     ids['url'] = ds
                 input_source = index_result.__dict__['_d_']
                 fixed_titles = []
                 # for ids in input_source.get('localIdentifier', []):
                 #     ds = resolveIdentifier(ids['id'], ids['type'])
                 #     ids['url'] = ds
                 if input_source.get('title', []) is not None:
                 if input_source.get('title', []) is not None:
                     for t in input_source.get('title', []):
                         if len(t) > 0 and t[0] == '"' and t[-1] == '"':
                             fixed_titles.append(t[1:-1])
                         else:
                             fixed_titles.append(t)
                 else:
                     fixed_titles.append("title not available")
                     fixed_titles.append("title not available")
                 input_source['title'] = fixed_titles
                 hits.append(input_source)
             pid_types = []
             for tag in response.aggs.all_pids.all_types.buckets:
                 pid_types.append(dict(key=tag.key, count=tag.doc_count))
-...
             query_type = Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
             args_id = {'source.dnetIdentifier': object_id}
             query_for_id = Q('nested', path='source', query=Q('bool', must=[Q('match', **args_id)]))
             s = Search(using=self.client).index(self.index_name+"_scholix").query(query_for_id & query_type)
             s = Search(using=self.client).index(self.index_name + "_scholix").query(query_for_id & query_type)
             if start:
                 s = s[start:start + 10]
             response = s.execute()
             hits = []
             for index_hit in response.hits:
-...
                     for item in current_item['target']['identifier']:
                         c_it = item
                         c_it['url'] = resolveIdentifier(item['identifier'], item['schema'])
                         ids .append(c_it)
                         ids.append(c_it)
                     current_item['target']['identifier'] = ids
                 hits.append(current_item)
-...
             if relation is None:
                 return
             relSource = relation.get('source')
             collectedFrom = relSource.get('collectedFrom',[])
             collectedFrom = relSource.get('collectedFrom', [])
             if collectedFrom is not None:
                 for coll in collectedFrom:
                     for d in source['datasources']:
-...
         def item_by_id(self, id, type=None, start=None):
             try:
                 res = self.client.get(index=self.index_name+"_object",doc_type="_all", id=id, _source=True)
                 res = self.client.get(index=self.index_name + "_object", doc_type="_all", id=id, _source=True)
                 hits = []
                 input_source = res['_source']
                 fixed_titles = []
                 for t in input_source.get('title',[]):
                 for t in input_source.get('title', []):
                     if len(t) > 0 and t[0] == '"' and t[-1] == '"':
                         fixed_titles.append(t[1:-1])
                     else:
                         fixed_titles.append(t)
                 input_source['title'] = fixed_titles
                 for ids in input_source.get('localIdentifier', []):
                     ds = resolveIdentifier(ids['id'], ids['type'])
                     ids['url'] = ds
                 related_publications = []
                 related_dataset = []
                 related_unknown = []
                 related_unknown = []
                 rel_source = None
                 if input_source.get('relatedPublications') > 0:
-...
                     else:
                         rel_source = {}
                 if input_source.get('relatedDatasets') > 0:
                     if 'dataset' == type:
                         related_dataset = self.related_type(id, 'dataset', start)
-...
                                  related_unknown=related_unknown))
                 return DLIESResponse(total=1, hits=hits)
             except Exception as e:
             except Exception as e:
                 log.error("Error on getting item ")
                 log.error(e)
                 log.error(e)
                 log.error("on line %i" % sys.exc_info)
                 return DLIESResponse()

Also available in: Unified diff

Project

General

Profile

D-Net

Revision 61369

Added by Sandro La Bruzzo almost 3 years ago