Project

General

Profile

1
from elasticsearch import *
2
from elasticsearch_dsl import *
3
from os import path
4
import os
5
from eu.dnetlib.util import get_index_properties
6
from elasticsearch_dsl.response import Response
7
import logging
8
from elasticsearch_dsl.connections import Connections
9

    
10

    
11
log = logging.getLogger("scholexplorer")
12

    
13
class ScholixConnector(object):    
14

    
15
    __instance = None
16

    
17
    def __new__(cls):
18
        if ScholixConnector.__instance is None:
19
            ScholixConnector.__instance = object.__new__(cls)        
20
            props = get_index_properties()
21
            index_name = props['api.index']
22
            index_host = [x for x in props['es_index'].split(',')]    
23
            #connections.create_connection(hosts=index_host, timeout=1000)    
24
            ScholixConnector.__instance.connection = Connections()   
25
            ScholixConnector.__instance.connection.create_connection(hosts=index_host, timeout=1000)    
26
            ScholixConnector.__instance.index_host = index_host
27
            ScholixConnector.__instance.index_name = index_name
28
        return ScholixConnector.__instance
29

    
30
    def create_pidType_query(self, value, start):
31
        args = {start + '.identifier.schema': value}
32
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
33

    
34
    def create_pid_query(self, value, start):
35
        args = {start + '.identifier.identifier': value.lower()}
36
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
37

    
38
    def create_typology_query(self, value):
39
        args = {'target.objectType': value}
40
        return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
41

    
42
    def create_dataSource_query(self, value):
43
        args = {'linkprovider.name': value}
44
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
45

    
46
    def create_publisher_query(self, value, start):
47
        args = {start + '.publisher.name': value}
48
        q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)]))
49
        return Q('nested', path=start, query=q)
50

    
51
    def list_datasources(self, ds_name=None):
52
        search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix')
53
        if ds_name:
54
            search_object = search_object.query(self.create_dataSource_query(ds_name))
55
        else:
56
            search_object = search_object.query()
57
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
58
                                                                                           field='linkprovider.name',
59
                                                                                           size=100)
60

    
61
        response = search_object.execute()
62
        if ds_name:
63
            for item in response.aggs.all_datasources.all_names.buckets:
64
                if item.key == ds_name:
65
                    yield dict(name=item.key, totalRelationships=item.doc_count)
66

    
67
        else:
68
            for item in response.aggs.all_datasources.all_names.buckets:
69
                yield dict(name=item.key, totalRelationships=item.doc_count)    
70
        
71

    
72
    def list_publisher(self, start, pub_name=None):
73
        log.info("Started Index from host")
74
        search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix')
75
        if pub_name:
76
            search_object = search_object.query(self.create_publisher_query(pub_name, start))
77
        else:
78
            search_object = search_object.query()
79
        search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested',
80
                                                                              path=start + '.publisher').bucket(
81
            'all_pubs', 'terms',
82
            field=start + '.publisher.name',
83
            size=1000)
84

    
85
        response = search_object.execute()        
86
        for item in response.aggs.all_targets.all_t_pubs.all_pubs.buckets:
87
            if pub_name and item.key == pub_name:            
88
                yield dict(name=item.key, totalRelationships=item.doc_count)
89
            else:
90
                yield dict(name=item.key, totalRelationships=item.doc_count)
91
            
92
        
93

    
94
    def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None,
95
              t_pid_type=None, target_Type=None, page=0):       
96
        queries = []
97
        if provider:
98
            log.info("PROVIDER NOT NONE: {}".format(provider))
99
            queries.append(self.create_dataSource_query(provider))
100
        if s_pid:
101
            log.info("S_PID NOT NONE: {}".format(s_pid))
102
            queries.append(self.create_pid_query(s_pid, 'source'))
103
        if t_pid:
104
            queries.append(self.create_pid_query(t_pid, 'target'))
105
        if s_publisher:
106
            queries.append(self.create_publisher_query(s_publisher, 'source'))
107
        if t_publisher:
108
            queries.append(self.create_publisher_query(t_publisher, 'target'))
109
        if s_pid_type:
110
            queries.append(self.create_pidType_query(s_pid_type, 'source'))
111
        if t_pid_type:
112
            queries.append(self.create_pidType_query(t_pid_type, 'target'))
113
        if target_Type:
114
            queries.append(self.create_typology_query(target_Type))
115
        q = None
116
        for item in queries:
117
            if not q:
118
                q = item
119
            else:
120
                q = q & item
121
        log.debug("REQUEST CREATED {}".format(q))
122
        search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(q)
123
        log.debug("Page request size is {}".format(page))
124
        if page > 9999:
125
            return []
126

    
127
        return search_object[page:page + 100].execute()
128

    
129

    
130
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
131
        if pidType:
132
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
133
        else:
134
            query = self.create_source_pid_query(pid.lower())
135
        filters = []
136
        if datasource and len(datasource):
137
            filters.append(self.create_dataSource_query(datasource))
138
        if typology and len(typology):
139
            filters.append(self.create_typology_query(typology))
140
        search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(query)
141

    
142
        if len(filters):
143
            search_object = search_object.filter(Q('bool', must=filters))
144
            if page > 9999:
145
                return []
146
        return search_object[page:page + 100].execute()
147

    
148
    def realtionToTypology(self, typology, page=0):
149
        search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(
150
            self.create_typology_query(typology))
151
        if page > 9999:
152
            return []
153
        return search_object[page:page + 100].execute()
(1-1/5)