Project

General

Profile

1
from elasticsearch import *
2
from elasticsearch_dsl import *
3
from os import path
4
import os
5
from typing import List
6
from swagger_server.models import Scholix, ObjectType, DSType, IdentifierType, RelationshipType, ObjectType, \
7
    ObjectTypeObjectType, CreatorType
8

    
9

    
10
def get_property():
11
    f = open(path.join(os.path.dirname(os.path.realpath(__file__)), '../../../api.properties'))
12
    p = {}
13
    for line in f:
14
        if not line.startswith("#"):
15
            data = line.strip().split("=")
16
            p[data[0].strip()] = data[1].strip()
17
    return p
18

    
19

    
20
class DLIESConnector(object):
21
    def __init__(self, index_host):
22
        props = get_property()
23
        index_name = props['api.index']
24

    
25
        self.index_host = [x.strip() for x in props['es_index'].split(',')]
26
        print "start with index " + index_name
27
        print index_host
28
        self.client = Elasticsearch(hosts=self.index_host)
29
        self.index_name = index_name
30

    
31
    def create_pid_pidType_query(self, pidType, pid):
32
        args = {'source.identifier.schema': pidType}
33
        args2 = {'source.identifier.identifier': pid}
34
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args), Q('match', **args2)]))
35

    
36
    def create_source_pid_query(self, value):
37
        args = {'source.identifier.identifier': value}
38
        return Q('nested', path='source.identifier', query=Q('bool', must=[Q('match', **args)]))
39

    
40
    def create_typology_query(self, value):
41
        args = {'target.objectType': value}
42
        return Q('nested', path='target', query=Q('bool', must=[Q('match', **args)]))
43

    
44
    def create_dataSource_query(self, value):
45
        args = {'linkprovider.name': value}
46
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
47

    
48
    def create_publisher_query(self, value):
49
        args = {'target.publisher.name': value}
50
        q = Q('nested', path='target.publisher', query=Q('bool', must=[Q('match', **args)]))
51
        return Q('nested', path='target', query=q)
52

    
53
    def list_datasources(self):
54
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query()
55
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
56
                                                                                           field='linkprovider.name',
57
                                                                                           size=100)
58

    
59
        response = search_object.execute()
60
        return [item.key for item in response.aggs.all_datasources.all_names.buckets]
61

    
62
    def convertLinkProvider(self, lp):
63
        result = []
64
        for item in lp:
65
            current_identifiers = []
66
            for ids in item['identifiers']:
67
                current_identifiers.append(IdentifierType(identifier=ids['identifier'], schema=ids['schema']))
68
            tmp = DSType(name=item['name'],
69
                         identifiers=[IdentifierType(x['identifier'], x['schema']) for x in item['identifiers']])
70
            result.append(tmp)
71
        return result
72

    
73
    def convertObject(self, inputObj):
74
        result = ObjectType()
75
        result.identifiers = []
76
        for item in inputObj.identifier:
77
            result.identifiers.append(IdentifierType(item.identifier, item.schema))
78
        result.identifiers.append(IdentifierType(inputObj.dnetIdentifier, 'dnetIdentifier'))
79
        result.object_provider = self.convertLinkProvider([x.provider for x in inputObj.collectedFrom])
80
        result.object_type = ObjectTypeObjectType(inputObj.objectType, '')
81
        if 'title' in inputObj:
82
            result.title = inputObj.title.replace(""","")
83
        if 'creator' in inputObj:
84
            result.creators = [CreatorType(x.name) for x in inputObj.creator]
85
        if 'publisher' in inputObj:
86
            publishers = []
87
            for p in inputObj.publisher:
88
                publishers.append(dict(name=p.name))
89
            result.publisher = publishers
90
        return result
91

    
92
    def convertScholix(self, response):
93
        result = []
94
        for item in response.hits:
95
            s = Scholix()
96
            s.relationship = RelationshipType(item.relationship.name, item.relationship.schema, '')
97
            s.link_provider = self.convertLinkProvider(item['linkprovider'])
98
            s.source = self.convertObject(item.source)
99
            s.target = self.convertObject(item.target)
100
            result.append(s)
101
        return result
102

    
103
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
104
        if pidType:
105
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
106
        else:
107
            query = self.create_source_pid_query(pid.lower())
108
        filters = []
109
        if datasource and len(datasource):
110
            filters.append(self.create_dataSource_query(datasource))
111
        if typology and len(typology):
112
            filters.append(self.create_typology_query(typology))
113
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(query)
114

    
115
        if len(filters):
116
            search_object = search_object.filter(Q('bool', must=filters))
117
        return self.convertScholix(search_object[page:page + 100].execute())
118

    
119
    def realtionToTypology(self, typology, page=0):
120
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
121
            self.create_typology_query(typology))
122
        return self.convertScholix(search_object[page:page + 100].execute())
123

    
124
    def realtionFromDatasource(self, datasource, page=0):
125
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
126
            self.create_dataSource_query(datasource))
127
        return self.convertScholix(search_object[page:page + 100].execute())
128

    
129
    def realtionFromPublisher(self, publisher, page=0):
130
        search_object = Search(using=self.client, index=self.index_name).doc_type('scholix').query(
131
            self.create_publisher_query(publisher))
132
        return self.convertScholix(search_object[page:page + 100].execute())
(1-1/2)