1 |
46734
|
sandro.lab
|
from elasticsearch import *
|
2 |
|
|
from elasticsearch_dsl import *
|
3 |
49842
|
sandro.lab
|
from os import path
|
4 |
|
|
import os
|
5 |
46734
|
sandro.lab
|
from typing import List
|
6 |
46775
|
sandro.lab
|
from swagger_server.models import Scholix, ObjectType, DSType, IdentifierType, RelationshipType, ObjectType, \
|
7 |
|
|
ObjectTypeObjectType, CreatorType
|
8 |
46734
|
sandro.lab
|
|
9 |
|
|
|
10 |
49377
|
sandro.lab
|
def get_property():
|
11 |
49842
|
sandro.lab
|
f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../../api.properties'))
|
12 |
49377
|
sandro.lab
|
p = {}
|
13 |
|
|
for line in f:
|
14 |
|
|
data = line.strip().split("=")
|
15 |
|
|
p[data[0].strip()] = data[1].strip()
|
16 |
|
|
return p
|
17 |
|
|
|
18 |
|
|
|
19 |
49827
|
sandro.lab
|
class DLIESConnector(object):
|
20 |
49377
|
sandro.lab
|
def __init__(self, index_host):
|
21 |
|
|
props = get_property()
|
22 |
|
|
index_name = props['api.index']
|
23 |
49841
|
sandro.lab
|
|
24 |
|
|
self.index_host = [x.strip() for x in props['es_index'].split(',')]
|
25 |
49827
|
sandro.lab
|
print "start with index " + index_name
|
26 |
49841
|
sandro.lab
|
print index_host
|
27 |
49843
|
sandro.lab
|
self.client = Elasticsearch(hosts=self.index_host)
|
28 |
46734
|
sandro.lab
|
self.index_name = index_name
|
29 |
|
|
|
30 |
|
|
def create_pid_pidType_query(self, pidType, pid):
|
31 |
46775
|
sandro.lab
|
args = {'source.identifier.schema': pidType}
|
32 |
|
|
args2 = {'source.identifier.identifier': pid}
|
33 |
|
|
return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args), Q('match', **args2)]))
|
34 |
46734
|
sandro.lab
|
|
35 |
46775
|
sandro.lab
|
def create_source_pid_query(self, value):
|
36 |
|
|
args = {'source.identifier.identifier': value}
|
37 |
|
|
return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args)]))
|
38 |
46734
|
sandro.lab
|
|
39 |
|
|
def create_typology_query(self, value):
|
40 |
|
|
args = {'target.objectType': value}
|
41 |
|
|
return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
|
42 |
|
|
|
43 |
|
|
def create_dataSource_query(self, value):
|
44 |
|
|
args = {'linkprovider.name': value}
|
45 |
|
|
return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
|
46 |
|
|
|
47 |
47298
|
sandro.lab
|
def create_publisher_query(self, value):
|
48 |
|
|
args = {'target.publisher.name': value}
|
49 |
|
|
q = Q('nested', path='target.publisher', query=Q('bool', must=[Q('match', **args)]))
|
50 |
|
|
return Q('nested', path='target', query=q)
|
51 |
|
|
|
52 |
46734
|
sandro.lab
|
def list_datasources(self):
|
53 |
|
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query()
|
54 |
46775
|
sandro.lab
|
search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
|
55 |
|
|
field='linkprovider.name',
|
56 |
|
|
size=100)
|
57 |
46734
|
sandro.lab
|
|
58 |
|
|
response = search_object.execute()
|
59 |
|
|
return [item.key for item in response.aggs.all_datasources.all_names.buckets]
|
60 |
|
|
|
61 |
|
|
def convertLinkProvider(self, lp):
|
62 |
|
|
result = []
|
63 |
|
|
for item in lp:
|
64 |
46775
|
sandro.lab
|
current_identifiers = []
|
65 |
|
|
for ids in item['identifiers']:
|
66 |
46734
|
sandro.lab
|
current_identifiers.append(IdentifierType(identifier=ids['identifier'], schema=ids['schema']))
|
67 |
46775
|
sandro.lab
|
tmp = DSType(name=item['name'],
|
68 |
|
|
identifiers=[IdentifierType(x['identifier'], x['schema']) for x in item['identifiers']])
|
69 |
46734
|
sandro.lab
|
result.append(tmp)
|
70 |
|
|
return result
|
71 |
|
|
|
72 |
|
|
def convertObject(self, inputObj):
|
73 |
|
|
result = ObjectType()
|
74 |
46775
|
sandro.lab
|
result.identifiers = []
|
75 |
46734
|
sandro.lab
|
for item in inputObj.identifier:
|
76 |
46775
|
sandro.lab
|
result.identifiers.append(IdentifierType(item.identifier, item.schema))
|
77 |
46734
|
sandro.lab
|
result.identifiers.append(IdentifierType(inputObj.dnetIdentifier, 'dnetIdentifier'))
|
78 |
46775
|
sandro.lab
|
result.object_provider = self.convertLinkProvider([x.provider for x in inputObj.collectedFrom])
|
79 |
|
|
result.object_type = ObjectTypeObjectType(inputObj.objectType, '')
|
80 |
46734
|
sandro.lab
|
if 'title' in inputObj:
|
81 |
|
|
result.title = inputObj.title
|
82 |
|
|
if 'creator' in inputObj:
|
83 |
|
|
result.creators = [CreatorType(x.name) for x in inputObj.creator]
|
84 |
|
|
if 'publisher' in inputObj:
|
85 |
49827
|
sandro.lab
|
publishers = []
|
86 |
47298
|
sandro.lab
|
for p in inputObj.publisher:
|
87 |
47307
|
sandro.lab
|
publishers.append(dict(name=p.name))
|
88 |
47298
|
sandro.lab
|
result.publisher = publishers
|
89 |
46734
|
sandro.lab
|
return result
|
90 |
|
|
|
91 |
46775
|
sandro.lab
|
def convertScholix(self, response):
|
92 |
46734
|
sandro.lab
|
result = []
|
93 |
|
|
for item in response.hits:
|
94 |
46775
|
sandro.lab
|
s = Scholix()
|
95 |
|
|
s.relationship = RelationshipType(item.relationship.name, item.relationship.schema, '')
|
96 |
46734
|
sandro.lab
|
s.link_provider = self.convertLinkProvider(item['linkprovider'])
|
97 |
46775
|
sandro.lab
|
s.source = self.convertObject(item.source)
|
98 |
|
|
s.target = self.convertObject(item.target)
|
99 |
46734
|
sandro.lab
|
result.append(s)
|
100 |
|
|
return result
|
101 |
|
|
|
102 |
49827
|
sandro.lab
|
def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
|
103 |
46734
|
sandro.lab
|
if pidType:
|
104 |
49181
|
sandro.lab
|
query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
|
105 |
46734
|
sandro.lab
|
else:
|
106 |
49181
|
sandro.lab
|
query = self.create_source_pid_query(pid.lower())
|
107 |
46775
|
sandro.lab
|
filters = []
|
108 |
|
|
if datasource and len(datasource):
|
109 |
49827
|
sandro.lab
|
filters.append(self.create_dataSource_query(datasource))
|
110 |
46775
|
sandro.lab
|
if typology and len(typology):
|
111 |
|
|
filters.append(self.create_typology_query(typology))
|
112 |
|
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
|
113 |
46734
|
sandro.lab
|
|
114 |
46775
|
sandro.lab
|
if len(filters):
|
115 |
|
|
search_object = search_object.filter(Q('bool', must=filters))
|
116 |
|
|
return self.convertScholix(search_object[page:page + 10].execute())
|
117 |
46734
|
sandro.lab
|
|
118 |
|
|
def realtionToTypology(self, typology, page=0):
|
119 |
46775
|
sandro.lab
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
|
120 |
|
|
self.create_typology_query(typology))
|
121 |
|
|
return self.convertScholix(search_object[page:page + 10].execute())
|
122 |
46734
|
sandro.lab
|
|
123 |
|
|
def realtionFromDatasource(self, datasource, page=0):
|
124 |
46775
|
sandro.lab
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
|
125 |
|
|
self.create_dataSource_query(datasource))
|
126 |
|
|
return self.convertScholix(search_object[page:page + 10].execute())
|
127 |
47298
|
sandro.lab
|
|
128 |
|
|
def realtionFromPublisher(self, publisher, page=0):
|
129 |
|
|
search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
|
130 |
|
|
self.create_publisher_query(publisher))
|
131 |
|
|
return self.convertScholix(search_object[page:page + 10].execute())
|