Project

General

Profile

1
from elasticsearch import *
2
from elasticsearch_dsl import *
3
from os import path
4
import os
5

    
6
from elasticsearch_dsl.response import Response
7

    
8

    
9
def get_property():
10
    f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../../../api.properties'))
11
    p = {}
12
    for line in f:
13
        if not line.startswith("#"):
14
            data = line.strip().split("=")
15
            p[data[0].strip()] = data[1].strip()
16
    return p
17

    
18

    
19
class DLIESConnector(object):
20
    def __init__(self):
21
        props = get_property()
22
        index_name = props['api.index']
23
        index_host = [x for x in props['es_index'].split(',')]
24

    
25
        self.client = Elasticsearch(hosts=index_host)
26
        self.index_name = index_name
27

    
28
    def create_pidType_query(self, value, start):
29
        args = {start + '.identifier.schema': value}
30
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
31

    
32
    def create_pid_query(self, value, start):
33
        args = {start + '.identifier.identifier': value.lower()}
34
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
35

    
36
    def create_typology_query(self, value):
37
        args = {'target.objectType': value}
38
        return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
39

    
40
    def create_dataSource_query(self, value):
41
        args = {'linkprovider.name': value}
42
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
43

    
44
    def create_publisher_query(self, value, start):
45
        args = {start + '.publisher.name': value}
46
        q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)]))
47
        return Q('nested', path=start, query=q)
48

    
49
    def list_datasources(self, ds_name=None):
50
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix')
51
        if ds_name:
52
            search_object = search_object.query(self.create_dataSource_query(ds_name))
53
        else:
54
            search_object = search_object.query()
55
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
56
                                                                                           field='linkprovider.name',
57
                                                                                           size=100)
58

    
59
        response = search_object.execute()
60
        if ds_name:
61
            return [dict(name=item.key, totalRelationships=item.doc_count) for item in
62
                    response.aggs.all_datasources.all_names.buckets if item.key == ds_name]
63
        return [dict(name=item.key, totalRelationships=item.doc_count) for item in
64
                response.aggs.all_datasources.all_names.buckets]
65

    
66
    def list_publisher(self, start, pub_name=None):
67
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix')
68
        if pub_name:
69
            search_object = search_object.query(self.create_publisher_query(pub_name, start))
70
        else:
71
            search_object = search_object.query()
72
        search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested',
73
                                                                              path=start + '.publisher').bucket(
74
            'all_pubs', 'terms',
75
            field=start + '.publisher.name',
76
            size=1000000)
77

    
78
        response = search_object.execute()
79
        if pub_name:
80
            return [dict(name=item.key, totalRelationships=item.doc_count) for item in
81
                    response.aggs.all_targets.all_t_pubs.all_pubs.buckets if item.key == pub_name]
82
        return [dict(name=item.key, totalRelationships=item.doc_count) for item in
83
                response.aggs.all_targets.all_t_pubs.all_pubs.buckets]
84

    
85
    def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None,
86
              t_pid_type=None, target_Type=None, page=0):
87
        queries = []
88
        if provider:
89
            queries.append(self.create_dataSource_query(provider))
90
        if s_pid:
91
            queries.append(self.create_pid_query(s_pid, 'source'))
92
        if t_pid:
93
            queries.append(self.create_pid_query(t_pid, 'target'))
94
        if s_publisher:
95
            queries.append(self.create_publisher_query(s_publisher, 'source'))
96
        if t_publisher:
97
            queries.append(self.create_publisher_query(t_publisher, 'target'))
98
        if s_pid_type:
99
            queries.append(self.create_pidType_query(s_pid_type, 'source'))
100
        if t_pid_type:
101
            queries.append(self.create_pidType_query(s_pid_type, 'target'))
102
        if target_Type:
103
            queries.append(self.create_typology_query(target_Type))
104
        q = None
105
        for item in queries:
106
            if not q:
107
                q = item
108
            else:
109
                q = q & item
110
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(q)
111

    
112
        return search_object[page:page + 100].execute()
113

    
114

    
115
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
116
        if pidType:
117
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
118
        else:
119
            query = self.create_source_pid_query(pid.lower())
120
        filters = []
121
        if datasource and len(datasource):
122
            filters.append(self.create_dataSource_query(datasource))
123
        if typology and len(typology):
124
            filters.append(self.create_typology_query(typology))
125
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
126

    
127
        if len(filters):
128
            search_object = search_object.filter(Q('bool', must=filters))
129
        return search_object[page:page + 100].execute()
130

    
131
    def realtionToTypology(self, typology, page=0):
132
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
133
            self.create_typology_query(typology))
134
        return search_object[page:page + 100].execute()
135

    
136

    
137
dli_index = DLIESConnector()
(1-1/2)