Project

General

Profile

1 46734 sandro.lab
from elasticsearch import *
2
from elasticsearch_dsl import *
3 49842 sandro.lab
from os import path
4
import os
5 46734 sandro.lab
from typing import List
6 46775 sandro.lab
from swagger_server.models import Scholix, ObjectType, DSType, IdentifierType, RelationshipType, ObjectType, \
7
    ObjectTypeObjectType, CreatorType
8 46734 sandro.lab
9
10 49377 sandro.lab
def get_property():
11 49842 sandro.lab
    f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../../api.properties'))
12 49377 sandro.lab
    p = {}
13
    for line in f:
14
        data = line.strip().split("=")
15
        p[data[0].strip()] = data[1].strip()
16
    return p
17
18
19 49827 sandro.lab
class DLIESConnector(object):
20 49377 sandro.lab
    def __init__(self, index_host):
21
        props = get_property()
22
        index_name = props['api.index']
23 49841 sandro.lab
24
        self.index_host = [x.strip() for x in props['es_index'].split(',')]
25 49827 sandro.lab
        print "start with index " + index_name
26 49841 sandro.lab
        print index_host
27 49843 sandro.lab
        self.client = Elasticsearch(hosts=self.index_host)
28 46734 sandro.lab
        self.index_name = index_name
29
30
    def create_pid_pidType_query(self, pidType, pid):
31 46775 sandro.lab
        args = {'source.identifier.schema': pidType}
32
        args2 = {'source.identifier.identifier': pid}
33
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args), Q('match', **args2)]))
34 46734 sandro.lab
35 46775 sandro.lab
    def create_source_pid_query(self, value):
36
        args = {'source.identifier.identifier': value}
37
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args)]))
38 46734 sandro.lab
39
    def create_typology_query(self, value):
40
        args = {'target.objectType': value}
41
        return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
42
43
    def create_dataSource_query(self, value):
44
        args = {'linkprovider.name': value}
45
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
46
47 47298 sandro.lab
    def create_publisher_query(self, value):
48
        args = {'target.publisher.name': value}
49
        q = Q('nested', path='target.publisher', query=Q('bool', must=[Q('match', **args)]))
50
        return Q('nested', path='target', query=q)
51
52 46734 sandro.lab
    def list_datasources(self):
53
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query()
54 46775 sandro.lab
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
55
                                                                                           field='linkprovider.name',
56
                                                                                           size=100)
57 46734 sandro.lab
58
        response = search_object.execute()
59
        return [item.key for item in response.aggs.all_datasources.all_names.buckets]
60
61
    def convertLinkProvider(self, lp):
62
        result = []
63
        for item in lp:
64 46775 sandro.lab
            current_identifiers = []
65
            for ids in item['identifiers']:
66 46734 sandro.lab
                current_identifiers.append(IdentifierType(identifier=ids['identifier'], schema=ids['schema']))
67 46775 sandro.lab
            tmp = DSType(name=item['name'],
68
                         identifiers=[IdentifierType(x['identifier'], x['schema']) for x in item['identifiers']])
69 46734 sandro.lab
            result.append(tmp)
70
        return result
71
72
    def convertObject(self, inputObj):
73
        result = ObjectType()
74 46775 sandro.lab
        result.identifiers = []
75 46734 sandro.lab
        for item in inputObj.identifier:
76 46775 sandro.lab
            result.identifiers.append(IdentifierType(item.identifier, item.schema))
77 46734 sandro.lab
        result.identifiers.append(IdentifierType(inputObj.dnetIdentifier, 'dnetIdentifier'))
78 46775 sandro.lab
        result.object_provider = self.convertLinkProvider([x.provider for x in inputObj.collectedFrom])
79
        result.object_type = ObjectTypeObjectType(inputObj.objectType, '')
80 46734 sandro.lab
        if 'title' in inputObj:
81
            result.title = inputObj.title
82
        if 'creator' in inputObj:
83
            result.creators = [CreatorType(x.name) for x in inputObj.creator]
84
        if 'publisher' in inputObj:
85 49827 sandro.lab
            publishers = []
86 47298 sandro.lab
            for p in inputObj.publisher:
87 47307 sandro.lab
                publishers.append(dict(name=p.name))
88 47298 sandro.lab
            result.publisher = publishers
89 46734 sandro.lab
        return result
90
91 46775 sandro.lab
    def convertScholix(self, response):
92 46734 sandro.lab
        result = []
93
        for item in response.hits:
94 46775 sandro.lab
            s = Scholix()
95
            s.relationship = RelationshipType(item.relationship.name, item.relationship.schema, '')
96 46734 sandro.lab
            s.link_provider = self.convertLinkProvider(item['linkprovider'])
97 46775 sandro.lab
            s.source = self.convertObject(item.source)
98
            s.target = self.convertObject(item.target)
99 46734 sandro.lab
            result.append(s)
100
        return result
101
102 49827 sandro.lab
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
103 46734 sandro.lab
        if pidType:
104 49181 sandro.lab
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
105 46734 sandro.lab
        else:
106 49181 sandro.lab
            query = self.create_source_pid_query(pid.lower())
107 46775 sandro.lab
        filters = []
108
        if datasource and len(datasource):
109 49827 sandro.lab
            filters.append(self.create_dataSource_query(datasource))
110 46775 sandro.lab
        if typology and len(typology):
111
            filters.append(self.create_typology_query(typology))
112
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
113 46734 sandro.lab
114 46775 sandro.lab
        if len(filters):
115
            search_object = search_object.filter(Q('bool', must=filters))
116
        return self.convertScholix(search_object[page:page + 10].execute())
117 46734 sandro.lab
118
    def realtionToTypology(self, typology, page=0):
119 46775 sandro.lab
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
120
            self.create_typology_query(typology))
121
        return self.convertScholix(search_object[page:page + 10].execute())
122 46734 sandro.lab
123
    def realtionFromDatasource(self, datasource, page=0):
124 46775 sandro.lab
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
125
            self.create_dataSource_query(datasource))
126
        return self.convertScholix(search_object[page:page + 10].execute())
127 47298 sandro.lab
128
    def realtionFromPublisher(self, publisher, page=0):
129
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
130
            self.create_publisher_query(publisher))
131
        return self.convertScholix(search_object[page:page + 10].execute())