1
|
from elasticsearch import *
|
2
|
from elasticsearch_dsl import *
|
3
|
from os import path
|
4
|
import os
|
5
|
|
6
|
from elasticsearch_dsl.response import Response
|
7
|
|
8
|
|
9
|
def get_property():
|
10
|
f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../../../api.properties'))
|
11
|
p = {}
|
12
|
for line in f:
|
13
|
if not line.startswith("#"):
|
14
|
data = line.strip().split("=")
|
15
|
p[data[0].strip()] = data[1].strip()
|
16
|
return p
|
17
|
|
18
|
|
19
|
class DLIESConnector(object):
|
20
|
def __init__(self):
|
21
|
props = get_property()
|
22
|
index_name = props['api.index']
|
23
|
index_host = [x for x in props['es_index'].split(',')]
|
24
|
|
25
|
self.client = Elasticsearch(hosts=index_host)
|
26
|
self.index_name = index_name
|
27
|
|
28
|
def create_pidType_query(self, value, start):
|
29
|
args = {start + '.identifier.schema': value}
|
30
|
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
|
31
|
|
32
|
def create_pid_query(self, value, start):
|
33
|
args = {start + '.identifier.identifier': value.lower()}
|
34
|
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
|
35
|
|
36
|
def create_typology_query(self, value):
|
37
|
args = {'target.objectType': value}
|
38
|
return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
|
39
|
|
40
|
def create_dataSource_query(self, value):
|
41
|
args = {'linkprovider.name': value}
|
42
|
return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
|
43
|
|
44
|
def create_publisher_query(self, value, start):
|
45
|
args = {start + '.publisher.name': value}
|
46
|
q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)]))
|
47
|
return Q('nested', path=start, query=q)
|
48
|
|
49
|
def list_datasources(self, ds_name=None):
|
50
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix')
|
51
|
if ds_name:
|
52
|
search_object = search_object.query(self.create_dataSource_query(ds_name))
|
53
|
else:
|
54
|
search_object = search_object.query()
|
55
|
search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
|
56
|
field='linkprovider.name',
|
57
|
size=100)
|
58
|
|
59
|
response = search_object.execute()
|
60
|
if ds_name:
|
61
|
return [dict(name=item.key, totalRelationships=item.doc_count) for item in
|
62
|
response.aggs.all_datasources.all_names.buckets if item.key == ds_name]
|
63
|
return [dict(name=item.key, totalRelationships=item.doc_count) for item in
|
64
|
response.aggs.all_datasources.all_names.buckets]
|
65
|
|
66
|
def list_publisher(self, start, pub_name=None):
|
67
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix')
|
68
|
if pub_name:
|
69
|
search_object = search_object.query(self.create_publisher_query(pub_name, start))
|
70
|
else:
|
71
|
search_object = search_object.query()
|
72
|
search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested',
|
73
|
path=start + '.publisher').bucket(
|
74
|
'all_pubs', 'terms',
|
75
|
field=start + '.publisher.name',
|
76
|
size=1000000)
|
77
|
|
78
|
response = search_object.execute()
|
79
|
if pub_name:
|
80
|
return [dict(name=item.key, totalRelationships=item.doc_count) for item in
|
81
|
response.aggs.all_targets.all_t_pubs.all_pubs.buckets if item.key == pub_name]
|
82
|
return [dict(name=item.key, totalRelationships=item.doc_count) for item in
|
83
|
response.aggs.all_targets.all_t_pubs.all_pubs.buckets]
|
84
|
|
85
|
def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None,
|
86
|
t_pid_type=None, target_Type=None, page=0):
|
87
|
queries = []
|
88
|
if provider:
|
89
|
queries.append(self.create_dataSource_query(provider))
|
90
|
if s_pid:
|
91
|
queries.append(self.create_pid_query(s_pid, 'source'))
|
92
|
if t_pid:
|
93
|
queries.append(self.create_pid_query(t_pid, 'target'))
|
94
|
if s_publisher:
|
95
|
queries.append(self.create_publisher_query(s_publisher, 'source'))
|
96
|
if t_publisher:
|
97
|
queries.append(self.create_publisher_query(t_publisher, 'target'))
|
98
|
if s_pid_type:
|
99
|
queries.append(self.create_pidType_query(s_pid_type, 'source'))
|
100
|
if t_pid_type:
|
101
|
queries.append(self.create_pidType_query(s_pid_type, 'target'))
|
102
|
if target_Type:
|
103
|
queries.append(self.create_typology_query(target_Type))
|
104
|
q = None
|
105
|
for item in queries:
|
106
|
if not q:
|
107
|
q = item
|
108
|
else:
|
109
|
q = q & item
|
110
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(q)
|
111
|
|
112
|
return search_object[page:page + 100].execute()
|
113
|
|
114
|
|
115
|
def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
|
116
|
if pidType:
|
117
|
query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
|
118
|
else:
|
119
|
query = self.create_source_pid_query(pid.lower())
|
120
|
filters = []
|
121
|
if datasource and len(datasource):
|
122
|
filters.append(self.create_dataSource_query(datasource))
|
123
|
if typology and len(typology):
|
124
|
filters.append(self.create_typology_query(typology))
|
125
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
|
126
|
|
127
|
if len(filters):
|
128
|
search_object = search_object.filter(Q('bool', must=filters))
|
129
|
return search_object[page:page + 100].execute()
|
130
|
|
131
|
def realtionToTypology(self, typology, page=0):
|
132
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
|
133
|
self.create_typology_query(typology))
|
134
|
return search_object[page:page + 100].execute()
|
135
|
|
136
|
|
137
|
dli_index = DLIESConnector()
|