Project

General

Profile

1
from pydantic import BaseModel, Schema
2
from typing import Dict, List
3
from datetime import datetime
4
from fastapi import HTTPException
5
import logging
6
from time import time
7

    
8
rels = dict(issupplementto="IsSupplementTo", issupplementedby="IsSupplementedBy", references="References",
9
            isreferencedby="IsReferencedBy")
10

    
11

    
12
pid_resolver = {
13
    "pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s",
14
    "ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
15
    "ncbi": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
16
    "pmid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
17
    "pmcid": "http://www.ncbi.nlm.nih.gov/pmc/articles/%s",
18
    "pubmedid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
19
    "doi": "http://dx.doi.org/%s",
20
    "genbank": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
21
    "nuccore": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
22
    "swiss-prot": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
23
    "arrayexpress": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
24
    "biomodels": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
25
    "bmrb": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
26
    "ena": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
27
    "geo": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
28
    "ensembl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
29
    "mgi": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
30
    "bind": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
31
    "pride": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
32
    "ddbj": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
33
    "bioproject": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
34
    "embl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
35
    "sra": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
36
}
37

    
38

    
39
def resolveIdentifier(pid, pid_type):
40
    if pid_type != None:
41
        if pid_type.lower() in pid_resolver:
42
            return pid_resolver[pid_type.lower()] % pid
43
        else:
44
            return "http://identifiers.org/%s:%s" % (pid_type, pid)
45

    
46

    
47
class LinkProvider(BaseModel):
48
    name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300)
49
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
50

    
51
class LinkPublisher(BaseModel):
52
    name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300)
53
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
54

    
55

    
56
class IdentifierType(BaseModel):
57
    ID: str = None
58
    IDScheme:str = None
59
    IDURL:str= None
60

    
61
class ScholixProviderType(BaseModel):
62
    name:str = None
63
    identifier:List[IdentifierType] = []
64

    
65
class RelationshipType(BaseModel):
66
    Name:str
67
    SubType:str = None
68
    SubTypeSchema:str = None
69

    
70
class CreatorType(BaseModel):
71
    Name: str
72
    Identifier:List[IdentifierType] = []
73
    
74

    
75

    
76
class ScholixItemType(BaseModel):
77
    Identifier:List[IdentifierType] = []
78
    Title:str = None
79
    Type:str 
80
    Creator:List[CreatorType] = []
81
    PublicationDate:str = None
82
    Publisher:List[ScholixProviderType] =[]    
83

    
84
class ScholixType(BaseModel):
85
    HarvestDate:str=None
86
    LicenseURL:str=None
87
    LinkProvider:List[ScholixProviderType] =[]
88
    LinkPublicationDate: str = None
89
    RelationshipType:RelationshipType
90
    source:ScholixItemType
91
    target:ScholixItemType
92
    
93

    
94
class PageResultType(BaseModel):
95
    currentPage:int
96
    totalLinks:int
97
    totalPages:int
98
    result:List[ScholixType] = []
99

    
100

    
101
def get_scholix_resource(item):
102
    title = ''
103
    if 'title' in item:
104
        title = item.title
105
    if title is not None and len(title):
106
        if title[0] == '"' and title[-1] == '"':
107
            title = title[1:-1]
108
    identifier = [dict(ID=x.identifier, IDScheme=x.schema, IDURL=resolveIdentifier(x.identifier, x.schema)) for x in
109
                  item.identifier]
110
    identifier.append(dict(ID=item.dnetIdentifier, IDScheme='D-Net Identifier', IDURL='http://scholexplorer.openaire.eu/index.html#/detail/%s'%item.dnetIdentifier))
111
    creator = []
112
    if 'creator' in item and item.creator is not None:        
113
        creator = [dict(Name=x.name) for x in item.creator]
114
    publicationDate = None
115
    if 'publicationDate' in item:
116
        publicationDate = item.publicationDate
117
    publisher = []
118
    if 'publisher' in item and item.publisher is not None:
119
        publisher = [dict(name= x.name) for x in item.publisher if x.name is not None]
120
    c_type = item.objectType
121
    if item.objectType == 'publication':
122
        c_type = 'literature'
123

    
124
    resource = dict(Title=title, Identifier=identifier, Creator=creator, PublicationDate= publicationDate, Publisher = publisher, Type= c_type)
125

    
126
    return resource
127

    
128

    
129
def convert_response(response):
130
    now = datetime.now()
131
    log = logging.getLogger("scholexplorer")
132
    start = time()
133
    for item in response.hits:
134
        current_item = {'LinkPublicationDate': now.strftime("%Y-%m-%d"), 'HarvestDate': now.strftime("%Y-%m-%d"),
135
                         "LinkProvider": []}
136
        for linkProvider in item.linkprovider:
137
            current_item['LinkProvider'].append(ScholixProviderType(name=linkProvider.name,
138
                                                     identifier=[IdentifierType(ID=x.identifier, IDScheme=x.schema) for x in
139
                                                                 linkProvider.identifiers]))
140

    
141
        rel_sub_type = rels.get(item.relationship.name.lower(), "IsRelatedTo")
142
        current_item['RelationshipType'] = dict(Name=rel_sub_type, SubType=item.relationship.name,
143
                                                SubTypeSchema=item.relationship.schema)
144

    
145
        current_item['source'] = get_scholix_resource(item.source)
146
        current_item['target'] = get_scholix_resource(item.target)
147

    
148
        yield current_item
149
    end = time()
150
    log.debug("response converted in {} ms".format(end-start))
151

    
152
def create_response(response, current_page):
153
    log = logging.getLogger("scholexplorer")    
154
    if current_page > 9999:
155
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")
156

    
157
    start = time()
158

    
159
    
160
    result = {'totalLinks': response.hits.total.value, 'currentPage': current_page /100,
161
              'totalPages': 1 + response.hits.total.value / 100, 'result': []}
162
    result['result'] = convert_response(response)
163
    end = time()
164

    
165
    log.debug("response created in {} ms".format(end-start))
166
    return result
(3-3/3)