Revision 58197
Added by Sandro La Bruzzo about 4 years ago
modules/dli-api/branches/ES_7/requirements.txt | ||
---|---|---|
1 |
certifi==2019.3.9 |
|
2 |
chardet==3.0.4 |
|
3 |
Click==7.0 |
|
4 |
elasticsearch==5.5.3 |
|
5 |
elasticsearch-dsl==5.4.0 |
|
6 |
fastapi==0.20.0 |
|
7 |
h11==0.8.1 |
|
8 |
httptools==0.0.13 |
|
9 |
idna==2.8 |
|
10 |
memory-profiler==0.55.0 |
|
11 |
psutil==5.6.2 |
|
12 |
pydantic==0.23 |
|
13 |
Pympler==0.7 |
|
14 |
python-dateutil==2.8.0 |
|
15 |
requests==2.22.0 |
|
16 |
six==1.12.0 |
|
17 |
starlette==0.11.1 |
|
18 |
urllib3==1.25.2 |
|
19 |
uvicorn==0.7.1 |
|
20 |
uvloop==0.12.2 |
|
21 |
websockets==7.0 |
|
22 |
prometheus-client==0.5.0 |
modules/dli-api/branches/ES_7/v1/api.py | ||
---|---|---|
1 |
from fastapi import FastAPI, Query,HTTPException |
|
2 |
from v1.responseModel import ScholixType,convert_response |
|
3 |
import logging |
|
4 |
from typing import List |
|
5 |
from eu.dnetlib.ScholixConnector import ScholixConnector |
|
6 |
import logging |
|
7 |
|
|
8 |
log = logging.getLogger("scholexplorer") |
|
9 |
|
|
10 |
subapi_v1 = FastAPI(title="Scholexplorer API 1.0", |
|
11 |
version="2.0.0", |
|
12 |
description="scholexplorer API version 1.0",openapi_prefix="/v1", docs_url="/ui/") |
|
13 |
|
|
14 |
@subapi_v1.get("/linksFromDatasource", |
|
15 |
tags=["Scholix"], |
|
16 |
summary="Get all scholix relation collected from a datasource", |
|
17 |
description="return a list of scholix object collected from a specific datasource" , |
|
18 |
response_model=List[ScholixType] |
|
19 |
) |
|
20 |
def links_from_datasource(datasource: str = Query((...),title="datasource", description="Filter Scholix relationships collected from a LinkProvider"),page:int= Query(None, title="page", description="select page of result")): |
|
21 |
s = ScholixConnector() |
|
22 |
m_page = 0 |
|
23 |
if page: |
|
24 |
m_page= page*100 |
|
25 |
if m_page > 9999: |
|
26 |
raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED") |
|
27 |
return convert_response(s.links(provider=datasource, page= m_page)) |
|
28 |
|
|
29 |
|
|
30 |
@subapi_v1.get("/linksFromPid", |
|
31 |
tags=["Scholix"], |
|
32 |
summary="Retrieve all scholix links from a persistent identifier", |
|
33 |
description="The linksFromPid endpoint returns a list of scholix object related from a specific persistent identifier" , |
|
34 |
response_model=List[ScholixType] |
|
35 |
) |
|
36 |
def links_from_pid( |
|
37 |
pid: str = Query((...), title="pid", description="persistent Identifier"), |
|
38 |
pidType: str = Query(None, title="pidType", description="Persistent Identifier Type"), |
|
39 |
typologyTarget: str = Query(None, title="targetPidType", description="typology target filter should be publication, dataset or unknown"), |
|
40 |
datasourceTarget: str = Query(None, title="datasourceTarget", description="a datasource provenace filter of the target relation"), |
|
41 |
page:int= Query(None, title="page", description="select page of result")): |
|
42 |
s = ScholixConnector() |
|
43 |
m_page = 0 |
|
44 |
if page: |
|
45 |
m_page= page*100 |
|
46 |
if m_page > 9999: |
|
47 |
raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED") |
|
48 |
return convert_response(s.links(s_pid =pid,s_pid_type= pidType,source_Type= typologyTarget, provider = datasourceTarget) ) |
|
49 |
|
|
50 |
|
|
51 |
@subapi_v1.get("/linksFromPublisher", |
|
52 |
tags=["Scholix"], |
|
53 |
summary="Get all scholix relation collected from a publisher", |
|
54 |
description="return a list of scholix object published from a specific publisher" , |
|
55 |
response_model=List[ScholixType] |
|
56 |
) |
|
57 |
def links_from_publisher(publisher: str = Query((...),title="publisher", description="Filter Scholix relationships collected from a publisher"),page:int= Query(None, title="page", description="select page of result")): |
|
58 |
s = ScholixConnector() |
|
59 |
m_page = 0 |
|
60 |
if page: |
|
61 |
m_page= page*100 |
|
62 |
if m_page > 9999: |
|
63 |
raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED") |
|
64 |
return convert_response(s.links(t_publisher=publisher, page= m_page)) |
|
65 |
|
|
66 |
|
|
67 |
@subapi_v1.get("/listDatasources", |
|
68 |
tags=["Datasources"], |
|
69 |
summary="Get all datasources", |
|
70 |
description="returns a list of all datasources" , |
|
71 |
response_model=List[str] |
|
72 |
) |
|
73 |
def list_datasources(): |
|
74 |
s = ScholixConnector() |
|
75 |
|
|
76 |
for item in s.list_datasources(): |
|
77 |
yield item['name'] |
|
78 |
|
|
79 |
|
|
80 |
|
modules/dli-api/branches/ES_7/v1/responseModel.py | ||
---|---|---|
1 |
from pydantic import BaseModel, Schema |
|
2 |
from typing import Dict, List |
|
3 |
from datetime import datetime |
|
4 |
from fastapi import HTTPException |
|
5 |
import logging |
|
6 |
from time import time |
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
class LinkProvider(BaseModel): |
|
11 |
name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300) |
|
12 |
totalRelationships:int=Schema(None, title= "The number of links that It provides") |
|
13 |
|
|
14 |
class LinkPublisher(BaseModel): |
|
15 |
name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300) |
|
16 |
totalRelationships:int=Schema(None, title= "The number of links that It provides") |
|
17 |
|
|
18 |
|
|
19 |
class IdentifierType(BaseModel): |
|
20 |
identifier: str = None |
|
21 |
schema_str:str = Schema(None, alias="schema") |
|
22 |
|
|
23 |
|
|
24 |
class ScholixProviderType(BaseModel): |
|
25 |
name:str |
|
26 |
identifiers:List[IdentifierType] = [] |
|
27 |
|
|
28 |
class RelationshipType(BaseModel): |
|
29 |
name:str |
|
30 |
schema_str:str = Schema(None, alias="schema") |
|
31 |
inverseRelationship:str = None |
|
32 |
|
|
33 |
class CreatorType(BaseModel): |
|
34 |
name: str |
|
35 |
identifiers:List[IdentifierType] = [] |
|
36 |
|
|
37 |
class ScholixObjectType(BaseModel): |
|
38 |
subtype:str = None |
|
39 |
type:str = None |
|
40 |
|
|
41 |
class ScholixItemType(BaseModel): |
|
42 |
identifiers:List[IdentifierType] = [] |
|
43 |
title:str = None |
|
44 |
objectType:str |
|
45 |
creators:List[CreatorType] = [] |
|
46 |
publisher:List[ScholixProviderType] =[] |
|
47 |
objectProvider: List[ScholixProviderType] =[] |
|
48 |
|
|
49 |
class ScholixType(BaseModel): |
|
50 |
linkProvider:List[ScholixProviderType] =[] |
|
51 |
publicationDate:str =None |
|
52 |
relationship:RelationshipType=None |
|
53 |
source:ScholixItemType=None |
|
54 |
target:ScholixItemType=None |
|
55 |
|
|
56 |
def convert_response(response): |
|
57 |
log = logging.getLogger("scholexplorer") |
|
58 |
|
|
59 |
|
|
60 |
for item in response.hits: |
|
61 |
result = item.__dict__['_d_'] |
|
62 |
result['linkProvider'] = result.pop('linkprovider') |
|
63 |
if 'creator' in result['source']: |
|
64 |
result['source']['creators']=result['source'].pop('creator') |
|
65 |
result['source']['identifiers']=result['source'].pop('identifier') |
|
66 |
result['source']['objectProvider'] = [s['provider'] for s in result['source'].get('collectedFrom',[])] |
|
67 |
if 'creator' in result['target']: |
|
68 |
result['target']['creators']=result['target'].pop('creator') |
|
69 |
result['target']['identifiers']=result['target'].pop('identifier') |
|
70 |
if 'collectedFrom' in result['target']: |
|
71 |
result['target']['objectProvider'] = [s['provider'] for s in result['target'].get('collectedFrom',[])] |
|
72 |
else: |
|
73 |
result['target']['objectProvider'] = [] |
|
74 |
yield result |
modules/dli-api/branches/ES_7/v2/responseModel.py | ||
---|---|---|
1 |
from pydantic import BaseModel, Schema |
|
2 |
from typing import Dict, List |
|
3 |
from datetime import datetime |
|
4 |
from fastapi import HTTPException |
|
5 |
import logging |
|
6 |
from time import time |
|
7 |
|
|
8 |
rels = dict(issupplementto="IsSupplementTo", issupplementedby="IsSupplementedBy", references="References", |
|
9 |
isreferencedby="IsReferencedBy") |
|
10 |
|
|
11 |
|
|
12 |
pid_resolver = { |
|
13 |
"pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s", |
|
14 |
"ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s", |
|
15 |
"ncbi": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s", |
|
16 |
"pmid": "http://www.ncbi.nlm.nih.gov/pubmed/%s", |
|
17 |
"pmcid": "http://www.ncbi.nlm.nih.gov/pmc/articles/%s", |
|
18 |
"pubmedid": "http://www.ncbi.nlm.nih.gov/pubmed/%s", |
|
19 |
"doi": "http://dx.doi.org/%s", |
|
20 |
"genbank": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
21 |
"nuccore": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
22 |
"swiss-prot": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
23 |
"arrayexpress": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
24 |
"biomodels": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
25 |
"bmrb": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
26 |
"ena": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
27 |
"geo": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
28 |
"ensembl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
29 |
"mgi": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
30 |
"bind": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
31 |
"pride": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
32 |
"ddbj": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
33 |
"bioproject": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
34 |
"embl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
35 |
"sra": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank", |
|
36 |
} |
|
37 |
|
|
38 |
|
|
39 |
def resolveIdentifier(pid, pid_type): |
|
40 |
if pid_type != None: |
|
41 |
if pid_type.lower() in pid_resolver: |
|
42 |
return pid_resolver[pid_type.lower()] % pid |
|
43 |
else: |
|
44 |
return "http://identifiers.org/%s:%s" % (pid_type, pid) |
|
45 |
|
|
46 |
|
|
47 |
class LinkProvider(BaseModel): |
|
48 |
name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300) |
|
49 |
totalRelationships:int=Schema(None, title= "The number of links that It provides") |
|
50 |
|
|
51 |
class LinkPublisher(BaseModel): |
|
52 |
name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300) |
|
53 |
totalRelationships:int=Schema(None, title= "The number of links that It provides") |
|
54 |
|
|
55 |
|
|
56 |
class IdentifierType(BaseModel): |
|
57 |
ID: str = None |
|
58 |
IDScheme:str = None |
|
59 |
IDURL:str= None |
|
60 |
|
|
61 |
class ScholixProviderType(BaseModel): |
|
62 |
name:str |
|
63 |
identifier:List[IdentifierType] = [] |
|
64 |
|
|
65 |
class RelationshipType(BaseModel): |
|
66 |
Name:str |
|
67 |
SubType:str |
|
68 |
SubTypeSchema:str = None |
|
69 |
|
|
70 |
class CreatorType(BaseModel): |
|
71 |
Name: str |
|
72 |
Identifier:List[IdentifierType] = [] |
|
73 |
|
|
74 |
|
|
75 |
|
|
76 |
class ScholixItemType(BaseModel): |
|
77 |
Identifier:List[IdentifierType] = [] |
|
78 |
Title:str = None |
|
79 |
Type:str |
|
80 |
Creator:List[CreatorType] = [] |
|
81 |
PublicationDate:str = None |
|
82 |
Publisher:List[ScholixProviderType] =[] |
|
83 |
|
|
84 |
class ScholixType(BaseModel): |
|
85 |
HarvestDate:str=None |
|
86 |
LicenseURL:str=None |
|
87 |
LinkProvider:List[ScholixProviderType] =[] |
|
88 |
LinkPublicationDate: str = None |
|
89 |
RelationshipType:RelationshipType |
|
90 |
source:ScholixItemType |
|
91 |
target:ScholixItemType |
|
92 |
|
|
93 |
|
|
94 |
class PageResultType(BaseModel): |
|
95 |
currentPage:int |
|
96 |
totalLinks:int |
|
97 |
totalPages:int |
|
98 |
result:List[ScholixType] = [] |
|
99 |
|
|
100 |
|
|
101 |
def get_scholix_resource(item): |
|
102 |
title = '' |
|
103 |
if 'title' in item: |
|
104 |
title = item.title |
|
105 |
if len(title): |
|
106 |
if title[0] == '"' and title[-1] == '"': |
|
107 |
title = title[1:-1] |
|
108 |
identifier = [dict(ID=x.identifier, IDScheme=x.schema, IDURL=resolveIdentifier(x.identifier, x.schema)) for x in |
|
109 |
item.identifier] |
|
110 |
identifier.append(dict(ID=item.dnetIdentifier, IDScheme='D-Net Identifier', IDURL='http://scholexplorer.openaire.eu/index.html#/detail/%s'%item.dnetIdentifier)) |
|
111 |
creator = [] |
|
112 |
if 'creator' in item: |
|
113 |
creator = [dict(Name=x.name) for x in item.creator] |
|
114 |
publicationDate = None |
|
115 |
if 'publicationDate' in item: |
|
116 |
publicationDate = item.publicationDate |
|
117 |
publisher = [] |
|
118 |
if 'publisher' in item: |
|
119 |
publisher = [dict(name= x.name) for x in item.publisher] |
|
120 |
c_type = item.objectType |
|
121 |
if item.objectType == 'publication': |
|
122 |
c_type = 'literature' |
|
123 |
|
|
124 |
resource = dict(Title=title, Identifier=identifier, Creator=creator, PublicationDate= publicationDate, Publisher = publisher, Type= c_type) |
|
125 |
|
|
126 |
return resource |
|
127 |
|
|
128 |
|
|
129 |
def convert_response(response): |
|
130 |
now = datetime.now() |
|
131 |
log = logging.getLogger("scholexplorer") |
|
132 |
start = time() |
|
133 |
for item in response.hits: |
|
134 |
current_item = {'LinkPublicationDate': now.strftime("%Y-%m-%d"), 'HarvestDate': now.strftime("%Y-%m-%d"), |
|
135 |
"LinkProvider": []} |
|
136 |
for linkProvider in item.linkprovider: |
|
137 |
current_item['LinkProvider'].append(ScholixProviderType(name=linkProvider.name, |
|
138 |
identifier=[IdentifierType(ID=x.identifier, IDScheme=x.schema) for x in |
|
139 |
linkProvider.identifiers])) |
|
140 |
|
|
141 |
rel_sub_type = rels.get(item.relationship.name.lower(), "IsRelatedTo") |
|
142 |
current_item['RelationshipType'] = dict(Name=rel_sub_type, SubType=item.relationship.name, |
|
143 |
SubTypeSchema=item.relationship.schema) |
|
144 |
|
|
145 |
current_item['source'] = get_scholix_resource(item.source) |
|
146 |
current_item['target'] = get_scholix_resource(item.target) |
|
147 |
|
|
148 |
yield current_item |
|
149 |
end = time() |
|
150 |
log.debug("response converted in {} ms".format(end-start)) |
|
151 |
|
|
152 |
def create_response(response, current_page): |
|
153 |
log = logging.getLogger("scholexplorer") |
|
154 |
if current_page > 9999: |
|
155 |
raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED") |
|
156 |
|
|
157 |
start = time() |
|
158 |
result = {'totalLinks': response.hits.total, 'currentPage': current_page /100, |
|
159 |
'totalPages': 1 + response.hits.total / 100, 'result': []} |
|
160 |
result['result'] = convert_response(response) |
|
161 |
end = time() |
|
162 |
|
|
163 |
log.debug("response created in {} ms".format(end-start)) |
|
164 |
return result |
modules/dli-api/branches/ES_7/v2/api_v2.py | ||
---|---|---|
1 |
from fastapi import FastAPI, Query,HTTPException |
|
2 |
from pydantic import BaseModel, Schema |
|
3 |
from typing import Dict, List |
|
4 |
from v2.responseModel import * |
|
5 |
from eu.dnetlib.ScholixConnector import ScholixConnector |
|
6 |
from datetime import datetime |
|
7 |
import time |
|
8 |
import logging |
|
9 |
|
|
10 |
|
|
11 |
subapi_v2 = FastAPI(title="Scholexplorer API 2.0", |
|
12 |
version="2.0.0", |
|
13 |
description="scholexplorer API version 2.0",openapi_prefix="/v2", docs_url="/ui/") |
|
14 |
|
|
15 |
req_fields = ['sourcePid', 'targetPid', 'sourcePublisher', 'targetPublisher', 'linkProvider'] |
|
16 |
|
|
17 |
@subapi_v2.get("/LinkProvider", response_model=List[LinkProvider], tags=['LinkProvider : Operation related to the Link Provider'], description="Return a list of link provider and relative number of relations",summary="Get All Link Providers") |
|
18 |
def linkProvider(name: str = Query(None, title="name", description="Filter the link provider by a name")): |
|
19 |
s = ScholixConnector() |
|
20 |
|
|
21 |
return s.list_datasources(name) |
|
22 |
|
|
23 |
@subapi_v2.get("/LinkPublisher/inSource", response_model=List[LinkPublisher], |
|
24 |
tags=['LinkPublisher : Operation related to the Link Publisher'], |
|
25 |
description="Return a List of all Publishers that provide source objects in Scholix links and the total number of links where the source object comes from this publisher", |
|
26 |
summary="Get All Publishers that provide source object") |
|
27 |
def linkPublisherInSource(name: str = Query(None, title="name", description="Filter the link publisher by a name")): |
|
28 |
s = ScholixConnector() |
|
29 |
return s.list_publisher('source',name) |
|
30 |
|
|
31 |
@subapi_v2.get("/LinkPublisher/inTarget", response_model=List[LinkPublisher], |
|
32 |
tags=['LinkPublisher : Operation related to the Link Publisher'], |
|
33 |
description="Return a List of all Publishers that provide target objects in Scholix links and the total number of links where the target object comes from this publisher", |
|
34 |
summary="Get All Publishers that provide target object") |
|
35 |
def linkPublisherInTarget(name: str = Query(None, title="name", description="Filter the link publisher by a name")): |
|
36 |
s = ScholixConnector() |
|
37 |
return s.list_publisher('target',name) |
|
38 |
|
|
39 |
|
|
40 |
@subapi_v2.get("/Links", response_model=PageResultType, tags=['Links : Operation related to the Scholix Links'], |
|
41 |
response_description="A list of List of scholix Links following the schema <a href='https://github.com/scholix/schema/tree/master/json/v3/schema.json'>https://github.com/scholix/schema/tree/master/json/v3/schema.json</a>", |
|
42 |
description="""Return a List of scholix Links, this method <b>require one of the following parameters </b> (<b>sourcePid, targetPid, sourcePublisher,\ |
|
43 |
\ targetPublisher, linkProvider</b>) all parameters can be combined""",summary="Get Scholix Links") |
|
44 |
def links( |
|
45 |
linkProvider: str = Query(None, title="linkProvider", description="Filter Scholix relationships collected from a LinkProvider"), |
|
46 |
targetPid: str = Query(None, title="targetPid", description="Filter Scholix relationships having a target pid"), |
|
47 |
targetPublisher: str = Query(None, title="targetPublisher", description="Filter Scholix relationships having a target published in a Publisher named targetPublisher"), |
|
48 |
targetPidType: str = Query(None, title="targetPidType", description="Filter Scholix relationships having a target pid type"), |
|
49 |
targetType: str = Query(None, title="targetType", description="Filter Scholix relationships having a target type (literature, dataset, unknown)"), |
|
50 |
sourceType: str = Query(None, title="sourceType", description="Filter Scholix relationships having a source type (literature, dataset, unknown)"), |
|
51 |
sourcePid: str = Query(None, title="sourcePid", description="Filter Scholix relationships having a source pid"), |
|
52 |
sourcePublisher: str = Query(None, title="sourcePublisher", description="Filter Scholix relationships having a source published in a Publisher named sourcePublisher"), |
|
53 |
sourcePidType: str = Query(None, title="sourcePidType", description="Filter Scholix relationships having a source pid type"), |
|
54 |
harvestedAfter: str = Query(None, title="harvestedAfter", description="Filter scholix Links having collected after this date"), |
|
55 |
page:int= Query(None, title="page", description="select page of result"), |
|
56 |
): |
|
57 |
|
|
58 |
log = logging.getLogger("scholexplorer") |
|
59 |
|
|
60 |
log.debug("request Link page") |
|
61 |
if not (sourcePid or targetPid or sourcePublisher or targetPublisher or linkProvider): |
|
62 |
raise HTTPException(status_code=400,detail='The method requires one of the following parameters: sourcePid, targetPid, sourcePublisher, targetPublisher, linkProvider') |
|
63 |
|
|
64 |
s = ScholixConnector() |
|
65 |
m_page = 0 |
|
66 |
if page: |
|
67 |
m_page= page*100 |
|
68 |
|
|
69 |
return create_response(s.links(provider = linkProvider, s_pid=sourcePid, t_pid=targetPid, s_publisher=sourcePublisher, |
|
70 |
t_publisher=targetPublisher, s_pid_type=sourcePidType, |
|
71 |
t_pid_type=targetPidType, page=m_page, target_Type=targetType, source_Type=sourceType),m_page) |
|
72 |
|
modules/dli-api/branches/ES_7/eu/dnetlib/util.py | ||
---|---|---|
1 |
import logging |
|
2 |
import os |
|
3 |
|
|
4 |
def get_index_properties(): |
|
5 |
if 'DLI_CONF_PATH' not in os.environ: |
|
6 |
logging.error("ENVIRONEMENT VARIABLE DLI_CONF_PATH DOES NOT EXISTS ") |
|
7 |
raise Exception("ENVIRONEMENT VARIABLE DLI_CONF_PATH DOES NOT EXISTS ") |
|
8 |
|
|
9 |
if not os.path.exists(os.environ['DLI_CONF_PATH']): |
|
10 |
logging.error("FILE {} DOES NOT EXISTS ".format(os.environ['DLI_CONF_PATH'])) |
|
11 |
raise Exception("FILE {} DOES NOT EXISTS ".format(os.environ['DLI_CONF_PATH'])) |
|
12 |
|
|
13 |
|
|
14 |
with open(os.environ['DLI_CONF_PATH']) as f: |
|
15 |
p = {} |
|
16 |
for line in f: |
|
17 |
if not line.startswith("#"): |
|
18 |
data = line.strip().split("=") |
|
19 |
p[data[0].strip()] = data[1].strip() |
|
20 |
return p |
|
21 |
|
modules/dli-api/branches/ES_7/eu/dnetlib/ScholixConnector.py | ||
---|---|---|
1 |
from elasticsearch import * |
|
2 |
from elasticsearch_dsl import * |
|
3 |
from os import path |
|
4 |
import os |
|
5 |
from eu.dnetlib.util import get_index_properties |
|
6 |
from elasticsearch_dsl.response import Response |
|
7 |
import logging |
|
8 |
|
|
9 |
log = logging.getLogger("scholexplorer") |
|
10 |
|
|
11 |
class ScholixConnector(object): |
|
12 |
|
|
13 |
__instance = None |
|
14 |
|
|
15 |
def __new__(cls): |
|
16 |
if ScholixConnector.__instance is None: |
|
17 |
ScholixConnector.__instance = object.__new__(cls) |
|
18 |
props = get_index_properties() |
|
19 |
index_name = props['api.index'] |
|
20 |
index_host = [x for x in props['es_index'].split(',')] |
|
21 |
#connections.create_connection(hosts=index_host, timeout=1000) |
|
22 |
ScholixConnector.__instance.connection = Elasticsearch(hosts=index_host, timeout=1000) |
|
23 |
ScholixConnector.__instance.index_host = index_host |
|
24 |
ScholixConnector.__instance.index_name = index_name |
|
25 |
return ScholixConnector.__instance |
|
26 |
|
|
27 |
def create_pidType_query(self, value, start): |
|
28 |
args = {start + '.identifier.schema': value} |
|
29 |
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)])) |
|
30 |
|
|
31 |
def create_pid_query(self, value, start): |
|
32 |
args = {start + '.identifier.identifier': value.lower()} |
|
33 |
return Q('nested', path=start + '.identifier', query=Q('bool', must=[Q('match', **args)])) |
|
34 |
|
|
35 |
def create_typology_query(self, value, start): |
|
36 |
args = {start + '.objectType': value} |
|
37 |
return Q('nested', path=start, query=Q('bool', must=[Q('match', **args)])) |
|
38 |
|
|
39 |
def create_dataSource_query(self, value): |
|
40 |
args = {'linkprovider.name': value} |
|
41 |
return Q('nested', path='linkprovider', query=Q('bool', must=[Q('match', **args)])) |
|
42 |
|
|
43 |
def create_publisher_query(self, value, start): |
|
44 |
args = {start + '.publisher.name': value} |
|
45 |
q = Q('nested', path=start + '.publisher', query=Q('bool', must=[Q('match', **args)])) |
|
46 |
return Q('nested', path=start, query=q) |
|
47 |
|
|
48 |
def list_datasources(self, ds_name=None): |
|
49 |
search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix') |
|
50 |
if ds_name: |
|
51 |
search_object = search_object.query(self.create_dataSource_query(ds_name)) |
|
52 |
else: |
|
53 |
search_object = search_object.query() |
|
54 |
search_object.aggs.bucket('all_datasources', 'nested', path='linkprovider').bucket('all_names', 'terms', |
|
55 |
field='linkprovider.name', |
|
56 |
size=100) |
|
57 |
|
|
58 |
response = search_object.execute() |
|
59 |
if ds_name: |
|
60 |
for item in response.aggs.all_datasources.all_names.buckets: |
|
61 |
if item.key == ds_name: |
|
62 |
yield dict(name=item.key, totalRelationships=item.doc_count) |
|
63 |
|
|
64 |
else: |
|
65 |
for item in response.aggs.all_datasources.all_names.buckets: |
|
66 |
yield dict(name=item.key, totalRelationships=item.doc_count) |
|
67 |
|
|
68 |
|
|
69 |
def list_publisher(self, start, pub_name=None): |
|
70 |
log.info("Started Index from host") |
|
71 |
search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix') |
|
72 |
if pub_name: |
|
73 |
search_object = search_object.query(self.create_publisher_query(pub_name, start)) |
|
74 |
else: |
|
75 |
search_object = search_object.query() |
|
76 |
search_object.aggs.bucket('all_targets', 'nested', path=start).bucket('all_t_pubs', 'nested', |
|
77 |
path=start + '.publisher').bucket( |
|
78 |
'all_pubs', 'terms', |
|
79 |
field=start + '.publisher.name', |
|
80 |
size=1000) |
|
81 |
|
|
82 |
response = search_object.execute() |
|
83 |
for item in response.aggs.all_targets.all_t_pubs.all_pubs.buckets: |
|
84 |
if pub_name and item.key == pub_name: |
|
85 |
yield dict(name=item.key, totalRelationships=item.doc_count) |
|
86 |
else: |
|
87 |
yield dict(name=item.key, totalRelationships=item.doc_count) |
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
def links(self, provider=None, s_pid=None, t_pid=None, s_publisher=None, t_publisher=None, s_pid_type=None, |
|
92 |
t_pid_type=None, target_Type=None, source_Type=None,page=0): |
|
93 |
queries = [] |
|
94 |
if provider: |
|
95 |
log.info("PROVIDER NOT NONE: {}".format(provider)) |
|
96 |
queries.append(self.create_dataSource_query(provider)) |
|
97 |
if s_pid: |
|
98 |
log.info("S_PID NOT NONE: {}".format(s_pid)) |
|
99 |
queries.append(self.create_pid_query(s_pid, 'source')) |
|
100 |
if t_pid: |
|
101 |
queries.append(self.create_pid_query(t_pid, 'target')) |
|
102 |
if s_publisher: |
|
103 |
queries.append(self.create_publisher_query(s_publisher, 'source')) |
|
104 |
if t_publisher: |
|
105 |
queries.append(self.create_publisher_query(t_publisher, 'target')) |
|
106 |
if s_pid_type: |
|
107 |
queries.append(self.create_pidType_query(s_pid_type, 'source')) |
|
108 |
if t_pid_type: |
|
109 |
queries.append(self.create_pidType_query(t_pid_type, 'target')) |
|
110 |
if target_Type: |
|
111 |
if 'literature' == target_Type: |
|
112 |
target_Type = 'publication' |
|
113 |
queries.append(self.create_typology_query(target_Type,'target')) |
|
114 |
if source_Type: |
|
115 |
if 'literature' == source_Type: |
|
116 |
source_Type = 'publication' |
|
117 |
queries.append(self.create_typology_query(source_Type,'source')) |
|
118 |
|
|
119 |
|
|
120 |
q = None |
|
121 |
for item in queries: |
|
122 |
if not q: |
|
123 |
q = item |
|
124 |
else: |
|
125 |
q = q & item |
|
126 |
log.debug("REQUEST CREATED {}".format(q)) |
|
127 |
search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query(q) |
|
128 |
log.debug("Page request size is {}".format(page)) |
|
129 |
if page > 9999: |
|
130 |
return [] |
|
131 |
|
|
132 |
return search_object[page:page + 100].execute() |
|
133 |
|
|
134 |
|
|
135 |
def realtionToPid(self, pid, pidType=None, datasource=None, typology=None, page=0): |
|
136 |
if pidType: |
|
137 |
query = self.create_pid_pidType_query(pidType.lower(), pid.lower()) |
|
138 |
else: |
|
139 |
query = self.create_source_pid_query(pid.lower()) |
|
140 |
filters = [] |
|
141 |
if datasource and len(datasource): |
|
142 |
filters.append(self.create_dataSource_query(datasource)) |
|
143 |
if typology and len(typology): |
|
144 |
filters.append(self.create_typology_query(typology,'target')) |
|
145 |
search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query(query) |
|
146 |
|
|
147 |
if len(filters): |
|
148 |
search_object = search_object.filter(Q('bool', must=filters)) |
|
149 |
if page > 9999: |
|
150 |
return [] |
|
151 |
return search_object[page:page + 100].execute() |
|
152 |
|
|
153 |
def realtionToTypology(self, typology, page=0): |
|
154 |
search_object = Search(using=self.connection, index=self.index_name).doc_type('scholix').query( |
|
155 |
self.create_typology_query(typology,'target')) |
|
156 |
if page > 9999: |
|
157 |
return [] |
|
158 |
return search_object[page:page + 100].execute() |
modules/dli-api/branches/ES_7/eu/dnetlib/metricsMiddleWare.py | ||
---|---|---|
1 |
import time |
|
2 |
|
|
3 |
from prometheus_client import Counter, Gauge, Histogram |
|
4 |
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint |
|
5 |
from starlette.requests import Request |
|
6 |
from starlette.responses import Response |
|
7 |
|
|
8 |
REQUESTS = Counter("starlette_requests_total", "Total count of requests by method and path.", ["method", "path"]) |
|
9 |
RESPONSES = Counter( |
|
10 |
"scholexplorerAPI_responses_total", |
|
11 |
"Total count of responses by method, path and status codes.", |
|
12 |
["method", "path", "status_code"], |
|
13 |
) |
|
14 |
REQUESTS_PROCESSING_TIME = Histogram( |
|
15 |
"scholexplorerAPI_requests_processing_time_seconds", |
|
16 |
"Histogram of requests processing time by path (in seconds)", |
|
17 |
["method", "path"], |
|
18 |
) |
|
19 |
EXCEPTIONS = Counter( |
|
20 |
"scholexplorerAPI_exceptions_total", |
|
21 |
"Histogram of exceptions raised by path and exception type", |
|
22 |
["method", "path", "exception_type"], |
|
23 |
) |
|
24 |
REQUESTS_IN_PROGRESS = Gauge( |
|
25 |
"scholexplorerAPI_requests_in_progress", |
|
26 |
"Gauge of requests by method and path currently being processed", |
|
27 |
["method", "path"], |
|
28 |
) |
|
29 |
|
|
30 |
|
|
31 |
class PrometheusMiddleware(BaseHTTPMiddleware): |
|
32 |
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: |
|
33 |
method = request.method |
|
34 |
path = request.url.path |
|
35 |
|
|
36 |
REQUESTS_IN_PROGRESS.labels(method=method, path=path).inc() |
|
37 |
REQUESTS.labels(method=method, path=path).inc() |
|
38 |
try: |
|
39 |
before_time = time.time() |
|
40 |
response = await call_next(request) |
|
41 |
after_time = time.time() |
|
42 |
except Exception as e: |
|
43 |
EXCEPTIONS.labels(method=method, path=path, exception_type=type(e).__name__).inc() |
|
44 |
raise e from None |
|
45 |
else: |
|
46 |
REQUESTS_PROCESSING_TIME.labels(method=method, path=path).observe(after_time - before_time) |
|
47 |
RESPONSES.labels(method=method, path=path, status_code=response.status_code).inc() |
|
48 |
finally: |
|
49 |
REQUESTS_IN_PROGRESS.labels(method=method, path=path).dec() |
|
50 |
|
|
51 |
return response |
modules/dli-api/branches/ES_7/eu/dnetlib/metrics_utils.py | ||
---|---|---|
1 |
import os |
|
2 |
|
|
3 |
from prometheus_client import CONTENT_TYPE_LATEST, REGISTRY, CollectorRegistry, generate_latest |
|
4 |
from prometheus_client.multiprocess import MultiProcessCollector |
|
5 |
from starlette.requests import Request |
|
6 |
from starlette.responses import Response |
|
7 |
|
|
8 |
|
|
9 |
def metrics(request: Request) -> Response: |
|
10 |
if "prometheus_multiproc_dir" in os.environ: |
|
11 |
registry = CollectorRegistry() |
|
12 |
MultiProcessCollector(registry) |
|
13 |
else: |
|
14 |
registry = REGISTRY |
|
15 |
|
|
16 |
return Response(generate_latest(registry), media_type=CONTENT_TYPE_LATEST) |
modules/dli-api/branches/ES_7/main.py | ||
---|---|---|
1 |
from fastapi import FastAPI |
|
2 |
from v2.api_v2 import subapi_v2 |
|
3 |
from v1.api import subapi_v1 |
|
4 |
from pympler import muppy, summary |
|
5 |
import logging |
|
6 |
from eu.dnetlib.metrics_utils import metrics |
|
7 |
from eu.dnetlib.metricsMiddleWare import PrometheusMiddleware |
|
8 |
import sys |
|
9 |
|
|
10 |
|
|
11 |
app = FastAPI() |
|
12 |
|
|
13 |
log = logging.getLogger("scholexplorer") |
|
14 |
log.setLevel(logging.INFO) |
|
15 |
fh = logging.StreamHandler(sys.stdout) |
|
16 |
fh.setLevel(logging.INFO) |
|
17 |
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
18 |
fh.setFormatter(formatter) |
|
19 |
log.addHandler(fh) |
|
20 |
log.info("Scholexplorer Api Restarted") |
|
21 |
|
|
22 |
|
|
23 |
|
|
24 |
app.add_middleware(PrometheusMiddleware) |
|
25 |
app.add_route("/metrics/", metrics) |
|
26 |
app.mount("/v2", subapi_v2) |
|
27 |
app.mount("/v1", subapi_v1) |
Also available in: Unified diff
created new branch for elasticsearch 7.5