1
|
from elasticsearch import *
|
2
|
from elasticsearch_dsl import *
|
3
|
from os import path
|
4
|
import os
|
5
|
from eu.dnetlib.util import get_index_properties
|
6
|
from elasticsearch_dsl.response import Response
|
7
|
import logging
|
8
|
from elasticsearch_dsl.connections import Connections
|
9
|
|
10
|
|
11
|
log = logging.getLogger("scholexplorer")
|
12
|
|
13
|
class ScholixConnector(object):
|
14
|
|
15
|
__instance = None
|
16
|
|
17
|
def __new__(cls):
|
18
|
if ScholixConnector.__instance is None:
|
19
|
ScholixConnector.__instance = object.__new__(cls)
|
20
|
props = get_index_properties()
|
21
|
index_name = props['api.index']
|
22
|
index_host = [x for x in props['es_index'].split(',')]
|
23
|
#connections.create_connection(hosts=index_host, timeout=1000)
|
24
|
ScholixConnector.__instance.connection = Connections()
|
25
|
ScholixConnector.__instance.connection.create_connection(hosts=index_host, timeout=1000)
|
26
|
ScholixConnector.__instance.index_host = index_host
|
27
|
ScholixConnector.__instance.index_name = index_name
|
28
|
return ScholixConnector.__instance
|
29
|
|
30
|
def create_pidType_query(self, value, start):
|
31
|
args = {start + '.identifier.schema': value}
|
32
|
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
|
33
|
|
34
|
def create_pid_query(self, value, start):
|
35
|
args = {start + '.identifier.identifier': value.lower()}
|
36
|
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
|
37
|
|
38
|
def create_typology_query(self, value):
|
39
|
args = {'target.objectType': value}
|
40
|
return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
|
41
|
|
42
|
def create_dataSource_query(self, value):
|
43
|
args = {'linkprovider.name': value}
|
44
|
return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
|
45
|
|
46
|
def create_publisher_query(self, value, start):
|
47
|
args = {start + '.publisher.name': value}
|
48
|
q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)]))
|
49
|
return Q('nested', path=start, query=q)
|
50
|
|
51
|
def list_datasources(self, ds_name=None):
|
52
|
search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix')
|
53
|
if ds_name:
|
54
|
search_object = search_object.query(self.create_dataSource_query(ds_name))
|
55
|
else:
|
56
|
search_object = search_object.query()
|
57
|
search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
|
58
|
field='linkprovider.name',
|
59
|
size=100)
|
60
|
|
61
|
response = search_object.execute()
|
62
|
if ds_name:
|
63
|
for item in response.aggs.all_datasources.all_names.buckets:
|
64
|
if item.key == ds_name:
|
65
|
yield dict(name=item.key, totalRelationships=item.doc_count)
|
66
|
|
67
|
else:
|
68
|
for item in response.aggs.all_datasources.all_names.buckets:
|
69
|
yield dict(name=item.key, totalRelationships=item.doc_count)
|
70
|
|
71
|
|
72
|
def list_publisher(self, start, pub_name=None):
|
73
|
log.info("Started Index from host")
|
74
|
search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix')
|
75
|
if pub_name:
|
76
|
search_object = search_object.query(self.create_publisher_query(pub_name, start))
|
77
|
else:
|
78
|
search_object = search_object.query()
|
79
|
search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested',
|
80
|
path=start + '.publisher').bucket(
|
81
|
'all_pubs', 'terms',
|
82
|
field=start + '.publisher.name',
|
83
|
size=1000)
|
84
|
|
85
|
response = search_object.execute()
|
86
|
for item in response.aggs.all_targets.all_t_pubs.all_pubs.buckets:
|
87
|
if pub_name and item.key == pub_name:
|
88
|
yield dict(name=item.key, totalRelationships=item.doc_count)
|
89
|
else:
|
90
|
yield dict(name=item.key, totalRelationships=item.doc_count)
|
91
|
|
92
|
|
93
|
|
94
|
def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None,
|
95
|
t_pid_type=None, target_Type=None, page=0):
|
96
|
queries = []
|
97
|
if provider:
|
98
|
log.info("PROVIDER NOT NONE: {}".format(provider))
|
99
|
queries.append(self.create_dataSource_query(provider))
|
100
|
if s_pid:
|
101
|
log.info("S_PID NOT NONE: {}".format(s_pid))
|
102
|
queries.append(self.create_pid_query(s_pid, 'source'))
|
103
|
if t_pid:
|
104
|
queries.append(self.create_pid_query(t_pid, 'target'))
|
105
|
if s_publisher:
|
106
|
queries.append(self.create_publisher_query(s_publisher, 'source'))
|
107
|
if t_publisher:
|
108
|
queries.append(self.create_publisher_query(t_publisher, 'target'))
|
109
|
if s_pid_type:
|
110
|
queries.append(self.create_pidType_query(s_pid_type, 'source'))
|
111
|
if t_pid_type:
|
112
|
queries.append(self.create_pidType_query(t_pid_type, 'target'))
|
113
|
if target_Type:
|
114
|
queries.append(self.create_typology_query(target_Type))
|
115
|
q = None
|
116
|
for item in queries:
|
117
|
if not q:
|
118
|
q = item
|
119
|
else:
|
120
|
q = q & item
|
121
|
log.debug("REQUEST CREATED {}".format(q))
|
122
|
search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(q)
|
123
|
log.debug("Page request size is {}".format(page))
|
124
|
if page > 9999:
|
125
|
return []
|
126
|
|
127
|
return search_object[page:page + 100].execute()
|
128
|
|
129
|
|
130
|
def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
|
131
|
if pidType:
|
132
|
query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
|
133
|
else:
|
134
|
query = self.create_source_pid_query(pid.lower())
|
135
|
filters = []
|
136
|
if datasource and len(datasource):
|
137
|
filters.append(self.create_dataSource_query(datasource))
|
138
|
if typology and len(typology):
|
139
|
filters.append(self.create_typology_query(typology))
|
140
|
search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(query)
|
141
|
|
142
|
if len(filters):
|
143
|
search_object = search_object.filter(Q('bool', must=filters))
|
144
|
if page > 9999:
|
145
|
return []
|
146
|
return search_object[page:page + 100].execute()
|
147
|
|
148
|
def realtionToTypology(self, typology, page=0):
|
149
|
search_object = Search(using=self.connection.get_connection(), index=self.index_name).doc_type('scholix').query(
|
150
|
self.create_typology_query(typology))
|
151
|
if page > 9999:
|
152
|
return []
|
153
|
return search_object[page:page + 100].execute()
|