Project

General

Profile

1
from pydantic import BaseModel, Schema
2
from typing import Dict, List
3
from datetime import datetime
4
from fastapi import HTTPException
5
import logging
6
from time import time
7

    
8
rels = dict(issupplementto="IsSupplementTo", issupplementedby="IsSupplementedBy", references="References",
9
            isreferencedby="IsReferencedBy")
10

    
11

    
12
pid_resolver = {
13
    "pdb": "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s",
14
    "ncbi-n": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
15
    "ncbi": "http://www.ncbi.nlm.nih.gov/gquery/?term=%s",
16
    "pmid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
17
    "pmcid": "http://www.ncbi.nlm.nih.gov/pmc/articles/%s",
18
    "pubmedid": "http://www.ncbi.nlm.nih.gov/pubmed/%s",
19
    "doi": "http://dx.doi.org/%s",
20
    "genbank": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
21
    "nuccore": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
22
    "swiss-prot": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
23
    "arrayexpress": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
24
    "biomodels": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
25
    "bmrb": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
26
    "ena": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
27
    "geo": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
28
    "ensembl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
29
    "mgi": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
30
    "bind": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
31
    "pride": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
32
    "ddbj": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
33
    "bioproject": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
34
    "embl": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
35
    "sra": "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank",
36
    "url":"%s"
37
}
38

    
39

    
40
def resolveIdentifier(pid, pid_type):
41
    if pid_type != None:
42
        if pid_type.lower() in pid_resolver:
43
            return pid_resolver[pid_type.lower()] % pid
44
        else:
45
            return "http://identifiers.org/%s:%s" % (pid_type, pid)
46

    
47

    
48
class LinkProvider(BaseModel):
49
    name:str = Schema(None, title= "The name of the Provider that provides the links", max_length= 300)
50
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
51

    
52
class LinkPublisher(BaseModel):
53
    name:str = Schema(None, title= "The name of the Publisher that provides the links", max_length= 300)
54
    totalRelationships:int=Schema(None, title= "The number of links that It provides")
55

    
56

    
57
class IdentifierType(BaseModel):
58
    ID: str = None
59
    IDScheme:str = None
60
    IDURL:str= None
61

    
62
class ScholixProviderType(BaseModel):
63
    name:str = None
64
    identifier:List[IdentifierType] = []
65

    
66
class RelationshipType(BaseModel):
67
    Name:str
68
    SubType:str = None
69
    SubTypeSchema:str = None
70

    
71
class CreatorType(BaseModel):
72
    Name: str
73
    Identifier:List[IdentifierType] = []
74
    
75

    
76

    
77
class ScholixItemType(BaseModel):
78
    Identifier:List[IdentifierType] = []
79
    Title:str = None
80
    Type:str 
81
    Creator:List[CreatorType] = []
82
    PublicationDate:str = None
83
    Publisher:List[ScholixProviderType] =[]    
84

    
85
class ScholixType(BaseModel):
86
    HarvestDate:str=None
87
    LicenseURL:str=None
88
    LinkProvider:List[ScholixProviderType] =[]
89
    LinkPublicationDate: str = None
90
    RelationshipType:RelationshipType
91
    source:ScholixItemType
92
    target:ScholixItemType
93
    
94

    
95
class PageResultType(BaseModel):
96
    currentPage:int
97
    totalLinks:int
98
    totalPages:int
99
    result:List[ScholixType] = []
100

    
101

    
102
def get_scholix_resource(item):
103
    title = ''
104
    if 'title' in item:
105
        title = item.title
106
    if title is not None and len(title):
107
        if title[0] == '"' and title[-1] == '"':
108
            title = title[1:-1]
109
    identifier = [dict(ID=x.identifier, IDScheme=x.schema, IDURL=x.url) for x in
110
                  item.identifier]
111
    identifier.append(dict(ID=item.dnetIdentifier, IDScheme='D-Net Identifier', IDURL='http://scholexplorer.openaire.eu/index.html#/detail/%s'%item.dnetIdentifier))
112
    creator = []
113
    if 'creator' in item and item.creator is not None:        
114
        creator = [dict(Name=x.name) for x in item.creator]
115
    publicationDate = None
116
    if 'publicationDate' in item:
117
        publicationDate = item.publicationDate
118
    publisher = []
119
    if 'publisher' in item and item.publisher is not None:
120
        publisher = [dict(name= x.name) for x in item.publisher if x.name is not None]
121
    c_type = item.objectType
122
    if item.objectType == 'publication':
123
        c_type = 'literature'
124

    
125
    resource = dict(Title=title, Identifier=identifier, Creator=creator, PublicationDate= publicationDate, Publisher = publisher, Type= c_type)
126

    
127
    return resource
128

    
129

    
130
def convert_response(response):
131
    now = datetime.now()
132
    log = logging.getLogger("scholexplorer")
133
    start = time()
134
    for item in response.hits:
135
        current_item = {'LinkPublicationDate': item.publicationDate, 'HarvestDate': item.publicationDate,
136
                         "LinkProvider": []}
137
        for linkProvider in item.linkprovider:
138
            current_item['LinkProvider'].append(ScholixProviderType(name=linkProvider.name,
139
                                                     identifier=[IdentifierType(ID=x.identifier, IDScheme=x.schema) for x in
140
                                                                 linkProvider.identifiers]))
141

    
142
        rel_sub_type = rels.get(item.relationship.name.lower(), "IsRelatedTo")
143
        current_item['RelationshipType'] = dict(Name=rel_sub_type, SubType=item.relationship.name,
144
                                                SubTypeSchema=item.relationship.schema)
145

    
146
        current_item['source'] = get_scholix_resource(item.source)
147
        current_item['target'] = get_scholix_resource(item.target)
148

    
149
        yield current_item
150
    end = time()
151
    log.debug("response converted in {} ms".format(end-start))
152

    
153
def create_response(response, current_page):
154
    log = logging.getLogger("scholexplorer")    
155
    if current_page > 9999:
156
        raise HTTPException(status_code=400,detail="MAX NUMBER OF PAGE REACHED")
157

    
158
    start = time()
159

    
160
    
161
    result = {'totalLinks': response.hits.total.value, 'currentPage': current_page /100,
162
              'totalPages': 1 + response.hits.total.value / 100, 'result': []}
163
    result['result'] = convert_response(response)
164
    end = time()
165

    
166
    log.debug("response created in {} ms".format(end-start))
167
    return result
(3-3/3)