Project

General

Profile

1
from elasticsearch import *
2
from elasticsearch_dsl import *
3
from typing import List
4
from swagger_server.models import Scholix, ObjectType, DSType, IdentifierType, RelationshipType, ObjectType, \
5
    ObjectTypeObjectType, CreatorType
6

    
7

    
8
def get_property():
9
    f = open('api.properties')
10
    p = {}
11
    for line in f:
12
        data = line.strip().split("=")
13
        p[data[0].strip()] = data[1].strip()
14
    return p
15

    
16

    
17
class DLIESConnector(object):
18
    def __init__(self, index_host):
19
        props = get_property()
20
        index_name = props['api.index']
21

    
22
        self.index_host = [x.strip() for x in props['es_index'].split(',')]
23
        print "start with index " + index_name
24
        print index_host
25
        self.client = Elasticsearch(hosts=index_host)
26
        self.index_name = index_name
27

    
28
    def create_pid_pidType_query(self, pidType, pid):
29
        args = {'source.identifier.schema': pidType}
30
        args2 = {'source.identifier.identifier': pid}
31
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args), Q('match', **args2)]))
32

    
33
    def create_source_pid_query(self, value):
34
        args = {'source.identifier.identifier': value}
35
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args)]))
36

    
37
    def create_typology_query(self, value):
38
        args = {'target.objectType': value}
39
        return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
40

    
41
    def create_dataSource_query(self, value):
42
        args = {'linkprovider.name': value}
43
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
44

    
45
    def create_publisher_query(self, value):
46
        args = {'target.publisher.name': value}
47
        q = Q('nested', path='target.publisher', query=Q('bool', must=[Q('match', **args)]))
48
        return Q('nested', path='target', query=q)
49

    
50
    def list_datasources(self):
51
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query()
52
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
53
                                                                                           field='linkprovider.name',
54
                                                                                           size=100)
55

    
56
        response = search_object.execute()
57
        return [item.key for item in response.aggs.all_datasources.all_names.buckets]
58

    
59
    def convertLinkProvider(self, lp):
60
        result = []
61
        for item in lp:
62
            current_identifiers = []
63
            for ids in item['identifiers']:
64
                current_identifiers.append(IdentifierType(identifier=ids['identifier'], schema=ids['schema']))
65
            tmp = DSType(name=item['name'],
66
                         identifiers=[IdentifierType(x['identifier'], x['schema']) for x in item['identifiers']])
67
            result.append(tmp)
68
        return result
69

    
70
    def convertObject(self, inputObj):
71
        result = ObjectType()
72
        result.identifiers = []
73
        for item in inputObj.identifier:
74
            result.identifiers.append(IdentifierType(item.identifier, item.schema))
75
        result.identifiers.append(IdentifierType(inputObj.dnetIdentifier, 'dnetIdentifier'))
76
        result.object_provider = self.convertLinkProvider([x.provider for x in inputObj.collectedFrom])
77
        result.object_type = ObjectTypeObjectType(inputObj.objectType, '')
78
        if 'title' in inputObj:
79
            result.title = inputObj.title
80
        if 'creator' in inputObj:
81
            result.creators = [CreatorType(x.name) for x in inputObj.creator]
82
        if 'publisher' in inputObj:
83
            publishers = []
84
            for p in inputObj.publisher:
85
                publishers.append(dict(name=p.name))
86
            result.publisher = publishers
87
        return result
88

    
89
    def convertScholix(self, response):
90
        result = []
91
        for item in response.hits:
92
            s = Scholix()
93
            s.relationship = RelationshipType(item.relationship.name, item.relationship.schema, '')
94
            s.link_provider = self.convertLinkProvider(item['linkprovider'])
95
            s.source = self.convertObject(item.source)
96
            s.target = self.convertObject(item.target)
97
            result.append(s)
98
        return result
99

    
100
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
101
        if pidType:
102
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
103
        else:
104
            query = self.create_source_pid_query(pid.lower())
105
        filters = []
106
        if datasource and len(datasource):
107
            filters.append(self.create_dataSource_query(datasource))
108
        if typology and len(typology):
109
            filters.append(self.create_typology_query(typology))
110
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
111

    
112
        if len(filters):
113
            search_object = search_object.filter(Q('bool', must=filters))
114
        return self.convertScholix(search_object[page:page + 10].execute())
115

    
116
    def realtionToTypology(self, typology, page=0):
117
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
118
            self.create_typology_query(typology))
119
        return self.convertScholix(search_object[page:page + 10].execute())
120

    
121
    def realtionFromDatasource(self, datasource, page=0):
122
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
123
            self.create_dataSource_query(datasource))
124
        return self.convertScholix(search_object[page:page + 10].execute())
125

    
126
    def realtionFromPublisher(self, publisher, page=0):
127
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
128
            self.create_publisher_query(publisher))
129
        return self.convertScholix(search_object[page:page + 10].execute())
(1-1/2)