Project

General

Profile

« Previous | Next » 

Revision 58197

created new branch for elasticsearch 7.5

View differences:

modules/dli-api/branches/ES_7/requirements.txt
1
certifi==2019.3.9
2
chardet==3.0.4
3
Click==7.0
4
elasticsearch==5.5.3
5
elasticsearch-dsl==5.4.0
6
fastapi==0.20.0
7
h11==0.8.1
8
httptools==0.0.13
9
idna==2.8
10
memory-profiler==0.55.0
11
psutil==5.6.2
12
pydantic==0.23
13
Pympler==0.7
14
python-dateutil==2.8.0
15
requests==2.22.0
16
six==1.12.0
17
starlette==0.11.1
18
urllib3==1.25.2
19
uvicorn==0.7.1
20
uvloop==0.12.2
21
websockets==7.0
22
prometheus-client==0.5.0
modules/dli-api/branches/ES_7/v1/api.py
1
from fastapi import FastAPI, Query,HTTPException
2
from v1.responseModel import ScholixType,convert_response
3
import logging
4
from typing import List
5
from eu.dnetlib.ScholixConnector import ScholixConnector
6
import logging
7

  
8
log = logging.getLogger("scholexplorer")
9

  
10
subapi_v1 = FastAPI(title="Scholexplorer API 1.0",
11
    version="2.0.0",
12
    description="scholexplorer API version 1.0",openapi_prefix="/v1", docs_url="/ui/")
13

  
14
@subapi_v1.get("/linksFromDatasource", 
15
        tags=["Scholix"], 
16
        summary="Get all scholix relation collected from a datasource",     
17
        description="return a list of scholix object collected from a specific datasource" ,
18
        response_model=List[ScholixType]        
19
        )
20
def links_from_datasource(datasource: str = Query((...),title="datasource", description="Filter Scholix relationships collected from a LinkProvider"),page:int= Query(None, title="page", description="select page of result")):
21
    s = ScholixConnector()
22
    m_page = 0
23
    if page:
24
            m_page= page*100      
25
    if m_page > 9999:
26
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")  
27
    return convert_response(s.links(provider=datasource, page= m_page))
28

  
29

  
30
@subapi_v1.get("/linksFromPid", 
31
        tags=["Scholix"], 
32
        summary="Retrieve all scholix links from a persistent identifier",     
33
        description="The linksFromPid endpoint returns a list of scholix object related from a specific persistent identifier" ,
34
        response_model=List[ScholixType]        
35
        )
36
def links_from_pid(
37
    pid: str = Query((...), title="pid", description="persistent Identifier"),
38
    pidType: str = Query(None, title="pidType", description="Persistent Identifier Type"),
39
    typologyTarget: str = Query(None, title="targetPidType", description="typology target filter should be publication, dataset or unknown"),
40
    datasourceTarget: str = Query(None, title="datasourceTarget", description="a datasource provenace filter of the target relation"),
41
    page:int= Query(None, title="page", description="select page of result")):
42
    s = ScholixConnector()    
43
    m_page = 0
44
    if page:
45
            m_page= page*100      
46
    if m_page > 9999:
47
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")  
48
    return convert_response(s.links(s_pid =pid,s_pid_type= pidType,source_Type= typologyTarget, provider = datasourceTarget) )
49

  
50

  
51
@subapi_v1.get("/linksFromPublisher", 
52
        tags=["Scholix"], 
53
        summary="Get all scholix relation collected from a publisher",     
54
        description="return a list of scholix object published from a specific publisher" ,
55
        response_model=List[ScholixType]        
56
        )
57
def links_from_publisher(publisher: str = Query((...),title="publisher", description="Filter Scholix relationships collected from a publisher"),page:int= Query(None, title="page", description="select page of result")):
58
    s = ScholixConnector()
59
    m_page = 0
60
    if page:
61
            m_page= page*100      
62
    if m_page > 9999:
63
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")  
64
    return convert_response(s.links(t_publisher=publisher, page= m_page))
65

  
66

  
67
@subapi_v1.get("/listDatasources", 
68
        tags=["Datasources"], 
69
        summary="Get all datasources",     
70
        description="returns a list of all datasources" ,
71
        response_model=List[str]        
72
        )
73
def list_datasources():
74
    s = ScholixConnector()
75

  
76
    for item in s.list_datasources():
77
        yield item['name']
78

  
79
    
80

  
modules/dli-api/branches/ES_7/v1/responseModel.py
1
from pydantic import BaseModel, Schema
2
from typing import Dict, List
3
from datetime import datetime
4
from fastapi import HTTPException
5
import logging
6
from time import time
7

  
8

  
9

  
10
class LinkProvider(BaseModel):
11
    name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300)
12
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
13

  
14
class LinkPublisher(BaseModel):
15
    name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300)
16
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
17

  
18

  
19
class IdentifierType(BaseModel):
20
    identifier: str = None
21
    schema_str:str = Schema(None, alias="schema")
22
    
23

  
24
class ScholixProviderType(BaseModel):
25
    name:str
26
    identifiers:List[IdentifierType] = []
27

  
28
class RelationshipType(BaseModel):
29
    name:str
30
    schema_str:str = Schema(None, alias="schema")
31
    inverseRelationship:str = None
32

  
33
class CreatorType(BaseModel):
34
    name: str
35
    identifiers:List[IdentifierType] = []
36
    
37
class ScholixObjectType(BaseModel):
38
    subtype:str = None
39
    type:str = None
40

  
41
class ScholixItemType(BaseModel):
42
    identifiers:List[IdentifierType] = []
43
    title:str = None
44
    objectType:str 
45
    creators:List[CreatorType] = []    
46
    publisher:List[ScholixProviderType] =[] 
47
    objectProvider: List[ScholixProviderType] =[]  
48

  
49
class ScholixType(BaseModel):
50
    linkProvider:List[ScholixProviderType] =[]    
51
    publicationDate:str =None
52
    relationship:RelationshipType=None
53
    source:ScholixItemType=None
54
    target:ScholixItemType=None
55
    
56
def convert_response(response):
57
    log = logging.getLogger("scholexplorer")
58

  
59
    
60
    for item in response.hits:
61
        result = item.__dict__['_d_']
62
        result['linkProvider'] = result.pop('linkprovider')
63
        if 'creator' in result['source']:
64
            result['source']['creators']=result['source'].pop('creator')
65
        result['source']['identifiers']=result['source'].pop('identifier')
66
        result['source']['objectProvider'] = [s['provider'] for s in result['source'].get('collectedFrom',[])]
67
        if 'creator' in result['target']:
68
            result['target']['creators']=result['target'].pop('creator')
69
        result['target']['identifiers']=result['target'].pop('identifier')
70
        if 'collectedFrom' in result['target']:
71
            result['target']['objectProvider'] = [s['provider'] for s in result['target'].get('collectedFrom',[])]
72
        else:
73
            result['target']['objectProvider'] = []
74
        yield  result
modules/dli-api/branches/ES_7/v2/responseModel.py
1
from pydantic import BaseModel, Schema
2
from typing import Dict, List
3
from datetime import datetime
4
from fastapi import HTTPException
5
import logging
6
from time import time
7

  
8
rels = dict(issupplementto="IsSupplementTo", issupplementedby="IsSupplementedBy", references="References",
9
            isreferencedby="IsReferencedBy")
10

  
11

  
12
pid_resolver = {
13
    "pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s",
14
    "ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
15
    "ncbi": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
16
    "pmid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
17
    "pmcid": "http://www.ncbi.nlm.nih.gov/pmc/articles/%s",
18
    "pubmedid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
19
    "doi": "http://dx.doi.org/%s",
20
    "genbank": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
21
    "nuccore": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
22
    "swiss-prot": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
23
    "arrayexpress": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
24
    "biomodels": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
25
    "bmrb": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
26
    "ena": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
27
    "geo": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
28
    "ensembl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
29
    "mgi": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
30
    "bind": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
31
    "pride": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
32
    "ddbj": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
33
    "bioproject": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
34
    "embl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
35
    "sra": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
36
}
37

  
38

  
39
def resolveIdentifier(pid, pid_type):
40
    if pid_type != None:
41
        if pid_type.lower() in pid_resolver:
42
            return pid_resolver[pid_type.lower()] % pid
43
        else:
44
            return "http://identifiers.org/%s:%s" % (pid_type, pid)
45

  
46

  
47
class LinkProvider(BaseModel):
48
    name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300)
49
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
50

  
51
class LinkPublisher(BaseModel):
52
    name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300)
53
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
54

  
55

  
56
class IdentifierType(BaseModel):
57
    ID: str = None
58
    IDScheme:str = None
59
    IDURL:str= None
60

  
61
class ScholixProviderType(BaseModel):
62
    name:str
63
    identifier:List[IdentifierType] = []
64

  
65
class RelationshipType(BaseModel):
66
    Name:str
67
    SubType:str
68
    SubTypeSchema:str = None
69

  
70
class CreatorType(BaseModel):
71
    Name: str
72
    Identifier:List[IdentifierType] = []
73
    
74

  
75

  
76
class ScholixItemType(BaseModel):
77
    Identifier:List[IdentifierType] = []
78
    Title:str = None
79
    Type:str 
80
    Creator:List[CreatorType] = []
81
    PublicationDate:str = None
82
    Publisher:List[ScholixProviderType] =[]    
83

  
84
class ScholixType(BaseModel):
85
    HarvestDate:str=None
86
    LicenseURL:str=None
87
    LinkProvider:List[ScholixProviderType] =[]
88
    LinkPublicationDate: str = None
89
    RelationshipType:RelationshipType
90
    source:ScholixItemType
91
    target:ScholixItemType
92
    
93

  
94
class PageResultType(BaseModel):
95
    currentPage:int
96
    totalLinks:int
97
    totalPages:int
98
    result:List[ScholixType] = []
99

  
100

  
101
def get_scholix_resource(item):
102
    title = ''
103
    if 'title' in item:
104
        title = item.title
105
    if len(title):
106
        if title[0] == '"' and title[-1] == '"':
107
            title = title[1:-1]
108
    identifier = [dict(ID=x.identifier, IDScheme=x.schema, IDURL=resolveIdentifier(x.identifier, x.schema)) for x in
109
                  item.identifier]
110
    identifier.append(dict(ID=item.dnetIdentifier, IDScheme='D-Net Identifier', IDURL='http://scholexplorer.openaire.eu/index.html#/detail/%s'%item.dnetIdentifier))
111
    creator = []
112
    if 'creator' in item:
113
        creator = [dict(Name=x.name) for x in item.creator]
114
    publicationDate = None
115
    if 'publicationDate' in item:
116
        publicationDate = item.publicationDate
117
    publisher = []
118
    if 'publisher' in item:
119
        publisher = [dict(name= x.name) for x in item.publisher]
120
    c_type = item.objectType
121
    if item.objectType == 'publication':
122
        c_type = 'literature'
123

  
124
    resource = dict(Title=title, Identifier=identifier, Creator=creator, PublicationDate= publicationDate, Publisher = publisher, Type= c_type)
125

  
126
    return resource
127

  
128

  
129
def convert_response(response):
130
    now = datetime.now()
131
    log = logging.getLogger("scholexplorer")
132
    start = time()
133
    for item in response.hits:
134
        current_item = {'LinkPublicationDate': now.strftime("%Y-%m-%d"), 'HarvestDate': now.strftime("%Y-%m-%d"),
135
                         "LinkProvider": []}
136
        for linkProvider in item.linkprovider:
137
            current_item['LinkProvider'].append(ScholixProviderType(name=linkProvider.name,
138
                                                     identifier=[IdentifierType(ID=x.identifier, IDScheme=x.schema) for x in
139
                                                                 linkProvider.identifiers]))
140

  
141
        rel_sub_type = rels.get(item.relationship.name.lower(), "IsRelatedTo")
142
        current_item['RelationshipType'] = dict(Name=rel_sub_type, SubType=item.relationship.name,
143
                                                SubTypeSchema=item.relationship.schema)
144

  
145
        current_item['source'] = get_scholix_resource(item.source)
146
        current_item['target'] = get_scholix_resource(item.target)
147

  
148
        yield current_item
149
    end = time()
150
    log.debug("response converted in {} ms".format(end-start))
151

  
152
def create_response(response, current_page):
153
    log = logging.getLogger("scholexplorer")    
154
    if current_page > 9999:
155
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")
156

  
157
    start = time()
158
    result = {'totalLinks': response.hits.total, 'currentPage': current_page /100,
159
              'totalPages': 1 + response.hits.total / 100, 'result': []}
160
    result['result'] = convert_response(response)
161
    end = time()
162

  
163
    log.debug("response created in {} ms".format(end-start))
164
    return result
modules/dli-api/branches/ES_7/v2/api_v2.py
1
from fastapi import FastAPI, Query,HTTPException
2
from pydantic import BaseModel, Schema
3
from typing import Dict, List
4
from v2.responseModel import *
5
from eu.dnetlib.ScholixConnector import ScholixConnector
6
from datetime import datetime
7
import time
8
import logging
9

  
10

  
11
subapi_v2 = FastAPI(title="Scholexplorer API 2.0",
12
    version="2.0.0",
13
    description="scholexplorer API version 2.0",openapi_prefix="/v2", docs_url="/ui/")
14

  
15
req_fields = ['sourcePid', 'targetPid', 'sourcePublisher', 'targetPublisher', 'linkProvider']
16

  
17
@subapi_v2.get("/LinkProvider", response_model=List[LinkProvider], tags=['LinkProvider : Operation related to the Link Provider'], description="Return a list of link provider and relative number of relations",summary="Get All Link Providers")
18
def linkProvider(name: str = Query(None, title="name", description="Filter the link provider by a name")):
19
        s = ScholixConnector()
20
        
21
        return s.list_datasources(name)
22

  
23
@subapi_v2.get("/LinkPublisher/inSource", response_model=List[LinkPublisher], 
24
            tags=['LinkPublisher : Operation related to the Link Publisher'], 
25
            description="Return a List of all Publishers that provide source objects in Scholix links and the total number of links where the source object comes from this publisher",
26
            summary="Get All Publishers that provide source object")
27
def linkPublisherInSource(name: str = Query(None, title="name", description="Filter the link publisher by a name")):
28
        s = ScholixConnector()                
29
        return s.list_publisher('source',name)
30

  
31
@subapi_v2.get("/LinkPublisher/inTarget", response_model=List[LinkPublisher], 
32
            tags=['LinkPublisher : Operation related to the Link Publisher'], 
33
            description="Return a List of all Publishers that provide target objects in Scholix links and the total number of links where the target object comes from this publisher",
34
            summary="Get All Publishers that provide target object")
35
def linkPublisherInTarget(name: str = Query(None, title="name", description="Filter the link publisher by a name")):
36
        s = ScholixConnector()
37
        return s.list_publisher('target',name)
38

  
39

  
40
@subapi_v2.get("/Links", response_model=PageResultType, tags=['Links : Operation related to the Scholix Links'], 
41
response_description="A list of List of scholix Links following the schema <a href='https://github.com/scholix/schema/tree/master/json/v3/schema.json'>https://github.com/scholix/schema/tree/master/json/v3/schema.json</a>",
42
description="""Return a List of scholix Links, this method <b>require one of the following parameters </b> (<b>sourcePid, targetPid, sourcePublisher,\
43
        \ targetPublisher, linkProvider</b>) all parameters can be combined""",summary="Get Scholix Links")
44
def links(
45
    linkProvider: str = Query(None, title="linkProvider", description="Filter Scholix relationships collected from a LinkProvider"),
46
    targetPid: str = Query(None, title="targetPid", description="Filter Scholix relationships having a target pid"),
47
    targetPublisher: str = Query(None, title="targetPublisher", description="Filter Scholix relationships having a target published in a Publisher named targetPublisher"),
48
    targetPidType: str = Query(None, title="targetPidType", description="Filter Scholix relationships having a target pid type"),
49
    targetType: str = Query(None, title="targetType", description="Filter Scholix relationships having a target type (literature, dataset, unknown)"),  
50
    sourceType: str = Query(None, title="sourceType", description="Filter Scholix relationships having a source type (literature, dataset, unknown)"),  
51
    sourcePid: str = Query(None, title="sourcePid", description="Filter Scholix relationships having a source pid"),
52
    sourcePublisher: str = Query(None, title="sourcePublisher", description="Filter Scholix relationships having a source published in a Publisher named sourcePublisher"),
53
    sourcePidType: str = Query(None, title="sourcePidType", description="Filter Scholix relationships having a source pid type"),
54
    harvestedAfter: str = Query(None, title="harvestedAfter", description="Filter scholix Links having collected after this date"),
55
        page:int= Query(None, title="page", description="select page of result"),
56
    ):
57

  
58
        log = logging.getLogger("scholexplorer")
59
        
60
        log.debug("request Link page")
61
        if not (sourcePid or targetPid or sourcePublisher or targetPublisher or linkProvider):
62
                raise HTTPException(status_code=400,detail='The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider')
63

  
64
        s = ScholixConnector()    
65
        m_page = 0
66
        if page:
67
                m_page= page*100
68
      
69
        return create_response(s.links(provider = linkProvider, s_pid=sourcePid, t_pid=targetPid, s_publisher=sourcePublisher,
70
                                           t_publisher=targetPublisher, s_pid_type=sourcePidType,
71
                                           t_pid_type=targetPidType, page=m_page, target_Type=targetType, source_Type=sourceType),m_page)    
72
        
modules/dli-api/branches/ES_7/eu/dnetlib/util.py
1
import logging
2
import os
3

  
4
def get_index_properties():
5
    if 'DLI_CONF_PATH' not in os.environ:
6
        logging.error("ENVIRONEMENT VARIABLE DLI_CONF_PATH DOES NOT EXISTS ")
7
        raise Exception("ENVIRONEMENT VARIABLE DLI_CONF_PATH DOES NOT EXISTS ")
8

  
9
    if not os.path.exists(os.environ['DLI_CONF_PATH']):
10
        logging.error("FILE {} DOES NOT EXISTS ".format(os.environ['DLI_CONF_PATH']))
11
        raise Exception("FILE {} DOES NOT EXISTS ".format(os.environ['DLI_CONF_PATH']))
12

  
13

  
14
    with open(os.environ['DLI_CONF_PATH']) as f:
15
        p = {}
16
        for line in f:
17
            if not line.startswith("#"):
18
                data = line.strip().split("=")
19
                p[data[0].strip()] = data[1].strip()
20
        return p
21
        
modules/dli-api/branches/ES_7/eu/dnetlib/ScholixConnector.py
1
from elasticsearch import *
2
from elasticsearch_dsl import *
3
from os import path
4
import os
5
from eu.dnetlib.util import get_index_properties
6
from elasticsearch_dsl.response import Response
7
import logging
8

  
9
log = logging.getLogger("scholexplorer")
10

  
11
class ScholixConnector(object):    
12

  
13
    __instance = None
14

  
15
    def __new__(cls):
16
        if ScholixConnector.__instance is None:
17
            ScholixConnector.__instance = object.__new__(cls)        
18
            props = get_index_properties()
19
            index_name = props['api.index']
20
            index_host = [x for x in props['es_index'].split(',')]    
21
            #connections.create_connection(hosts=index_host, timeout=1000)    
22
            ScholixConnector.__instance.connection = Elasticsearch(hosts=index_host, timeout=1000)
23
            ScholixConnector.__instance.index_host = index_host
24
            ScholixConnector.__instance.index_name = index_name
25
        return ScholixConnector.__instance
26

  
27
    def create_pidType_query(self, value, start):
28
        args = {start + '.identifier.schema': value}
29
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
30

  
31
    def create_pid_query(self, value, start):
32
        args = {start + '.identifier.identifier': value.lower()}
33
        return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)]))
34

  
35
    def create_typology_query(self, value, start):
36
        args = {start + '.objectType': value}
37
        return Q('nested', path=start, query=Q('bool', must=[Q('match', **args)]))
38

  
39
    def create_dataSource_query(self, value):
40
        args = {'linkprovider.name': value}
41
        return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)]))
42

  
43
    def create_publisher_query(self, value, start):
44
        args = {start + '.publisher.name': value}
45
        q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)]))
46
        return Q('nested', path=start, query=q)
47

  
48
    def list_datasources(self, ds_name=None):
49
        search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix')
50
        if ds_name:
51
            search_object = search_object.query(self.create_dataSource_query(ds_name))
52
        else:
53
            search_object = search_object.query()
54
        search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms',
55
                                                                                           field='linkprovider.name',
56
                                                                                           size=100)
57

  
58
        response = search_object.execute()
59
        if ds_name:
60
            for item in response.aggs.all_datasources.all_names.buckets:
61
                if item.key == ds_name:
62
                    yield dict(name=item.key, totalRelationships=item.doc_count)
63

  
64
        else:
65
            for item in response.aggs.all_datasources.all_names.buckets:
66
                yield dict(name=item.key, totalRelationships=item.doc_count)    
67
        
68

  
69
    def list_publisher(self, start, pub_name=None):
70
        log.info("Started Index from host")
71
        search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix')
72
        if pub_name:
73
            search_object = search_object.query(self.create_publisher_query(pub_name, start))
74
        else:
75
            search_object = search_object.query()
76
        search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested',
77
                                                                              path=start + '.publisher').bucket(
78
            'all_pubs', 'terms',
79
            field=start + '.publisher.name',
80
            size=1000)
81

  
82
        response = search_object.execute()        
83
        for item in response.aggs.all_targets.all_t_pubs.all_pubs.buckets:
84
            if pub_name and item.key == pub_name:            
85
                yield dict(name=item.key, totalRelationships=item.doc_count)
86
            else:
87
                yield dict(name=item.key, totalRelationships=item.doc_count)
88
            
89
        
90

  
91
    def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None,
92
              t_pid_type=None, target_Type=None, source_Type=None,page=0):       
93
        queries = []
94
        if provider:
95
            log.info("PROVIDER NOT NONE: {}".format(provider))
96
            queries.append(self.create_dataSource_query(provider))
97
        if s_pid:
98
            log.info("S_PID NOT NONE: {}".format(s_pid))
99
            queries.append(self.create_pid_query(s_pid, 'source'))
100
        if t_pid:
101
            queries.append(self.create_pid_query(t_pid, 'target'))
102
        if s_publisher:
103
            queries.append(self.create_publisher_query(s_publisher, 'source'))
104
        if t_publisher:
105
            queries.append(self.create_publisher_query(t_publisher, 'target'))
106
        if s_pid_type:
107
            queries.append(self.create_pidType_query(s_pid_type, 'source'))
108
        if t_pid_type:
109
            queries.append(self.create_pidType_query(t_pid_type, 'target'))
110
        if target_Type:
111
            if 'literature' == target_Type:
112
                target_Type = 'publication'
113
            queries.append(self.create_typology_query(target_Type,'target'))
114
        if source_Type:
115
            if 'literature' == source_Type:
116
                source_Type = 'publication'
117
            queries.append(self.create_typology_query(source_Type,'source'))
118

  
119
        
120
        q = None
121
        for item in queries:
122
            if not q:
123
                q = item
124
            else:
125
                q = q & item        
126
        log.debug("REQUEST CREATED {}".format(q))
127
        search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query(q)
128
        log.debug("Page request size is {}".format(page))
129
        if page > 9999:
130
            return []
131

  
132
        return search_object[page:page + 100].execute()
133

  
134

  
135
    def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0):
136
        if pidType:
137
            query = self.create_pid_pidType_query(pidType.lower(), pid.lower())
138
        else:
139
            query = self.create_source_pid_query(pid.lower())
140
        filters = []
141
        if datasource and len(datasource):
142
            filters.append(self.create_dataSource_query(datasource))
143
        if typology and len(typology):
144
            filters.append(self.create_typology_query(typology,'target'))
145
        search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query(query)
146

  
147
        if len(filters):
148
            search_object = search_object.filter(Q('bool', must=filters))
149
            if page > 9999:
150
                return []
151
        return search_object[page:page + 100].execute()
152

  
153
    def realtionToTypology(self, typology, page=0):
154
        search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query(
155
            self.create_typology_query(typology,'target'))
156
        if page > 9999:
157
            return []
158
        return search_object[page:page + 100].execute()
modules/dli-api/branches/ES_7/eu/dnetlib/metricsMiddleWare.py
1
import time
2

  
3
from prometheus_client import Counter, Gauge, Histogram
4
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
5
from starlette.requests import Request
6
from starlette.responses import Response
7

  
8
REQUESTS = Counter("starlette_requests_total", "Total count of requests by method and path.", ["method", "path"])
9
RESPONSES = Counter(
10
    "scholexplorerAPI_responses_total",
11
    "Total count of responses by method, path and status codes.",
12
    ["method", "path", "status_code"],
13
)
14
REQUESTS_PROCESSING_TIME = Histogram(
15
    "scholexplorerAPI_requests_processing_time_seconds",
16
    "Histogram of requests processing time by path (in seconds)",
17
    ["method", "path"],
18
)
19
EXCEPTIONS = Counter(
20
    "scholexplorerAPI_exceptions_total",
21
    "Histogram of exceptions raised by path and exception type",
22
    ["method", "path", "exception_type"],
23
)
24
REQUESTS_IN_PROGRESS = Gauge(
25
    "scholexplorerAPI_requests_in_progress",
26
    "Gauge of requests by method and path currently being processed",
27
    ["method", "path"],
28
)
29

  
30

  
31
class PrometheusMiddleware(BaseHTTPMiddleware):
32
    async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
33
        method = request.method
34
        path = request.url.path
35

  
36
        REQUESTS_IN_PROGRESS.labels(method=method, path=path).inc()
37
        REQUESTS.labels(method=method, path=path).inc()
38
        try:
39
            before_time = time.time()
40
            response = await call_next(request)
41
            after_time = time.time()
42
        except Exception as e:
43
            EXCEPTIONS.labels(method=method, path=path, exception_type=type(e).__name__).inc()
44
            raise e from None
45
        else:
46
            REQUESTS_PROCESSING_TIME.labels(method=method, path=path).observe(after_time - before_time)
47
            RESPONSES.labels(method=method, path=path, status_code=response.status_code).inc()
48
        finally:
49
            REQUESTS_IN_PROGRESS.labels(method=method, path=path).dec()
50

  
51
        return response
modules/dli-api/branches/ES_7/eu/dnetlib/metrics_utils.py
1
import os
2

  
3
from prometheus_client import CONTENT_TYPE_LATEST, REGISTRY, CollectorRegistry, generate_latest
4
from prometheus_client.multiprocess import MultiProcessCollector
5
from starlette.requests import Request
6
from starlette.responses import Response
7

  
8

  
9
def metrics(request: Request) -> Response:
10
    if "prometheus_multiproc_dir" in os.environ:
11
        registry = CollectorRegistry()
12
        MultiProcessCollector(registry)
13
    else:
14
        registry = REGISTRY
15

  
16
    return Response(generate_latest(registry), media_type=CONTENT_TYPE_LATEST)
modules/dli-api/branches/ES_7/main.py
1
from fastapi import FastAPI
2
from v2.api_v2 import subapi_v2
3
from v1.api import subapi_v1
4
from pympler import muppy, summary
5
import logging
6
from eu.dnetlib.metrics_utils import metrics 
7
from eu.dnetlib.metricsMiddleWare import PrometheusMiddleware
8
import sys
9

  
10

  
11
app = FastAPI()
12

  
13
log = logging.getLogger("scholexplorer")
14
log.setLevel(logging.INFO)
15
fh = logging.StreamHandler(sys.stdout) 
16
fh.setLevel(logging.INFO)
17
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
fh.setFormatter(formatter)
19
log.addHandler(fh)
20
log.info("Scholexplorer Api Restarted")
21

  
22

  
23

  
24
app.add_middleware(PrometheusMiddleware)
25
app.add_route("/metrics/", metrics)
26
app.mount("/v2", subapi_v2)
27
app.mount("/v1", subapi_v1)

Also available in: Unified diff