Project

General

Profile

1 43618 sandro.lab
package eu.dnetlib.dli;
2 34805 sandro.lab
3 46990 sandro.lab
import java.io.InputStream;
4 44352 sandro.lab
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import javax.annotation.PostConstruct;
8
9 34805 sandro.lab
import com.google.common.collect.BiMap;
10
import com.google.common.collect.HashBiMap;
11 46990 sandro.lab
import com.google.gson.Gson;
12 45427 sandro.lab
import eu.dnetlib.data.transform.Ontologies;
13
import eu.dnetlib.data.transform.OntologyLoader;
14 44352 sandro.lab
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
15 35554 sandro.lab
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
16 52724 sandro.lab
import eu.dnetlib.pid.resolver.model.PID;
17 44352 sandro.lab
import eu.dnetlib.rmi.enabling.ISLookUpException;
18
import eu.dnetlib.rmi.enabling.ISLookUpService;
19 46990 sandro.lab
import org.apache.commons.io.IOUtils;
20 43618 sandro.lab
import org.apache.commons.lang3.StringUtils;
21 44352 sandro.lab
import org.apache.commons.lang3.tuple.ImmutablePair;
22
import org.apache.commons.lang3.tuple.Pair;
23
import org.springframework.beans.factory.annotation.Autowired;
24 51637 sandro.lab
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26 34805 sandro.lab
27 35554 sandro.lab
public class DLIUtils {
28 34805 sandro.lab
29 51054 sandro.lab
    public final static Map<String, Pair<String, String>> datasources = new HashMap<>();
30
    public static final Map<String, String> resolvedTypes = new HashMap<String, String>() {
31
        {
32
            put("pdb", "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s");
33
            put("ncbi-n", "http://www.ncbi.nlm.nih.gov/gquery/?term=%s");
34
            put("pmid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
35
            put("pmcid", "http://www.ncbi.nlm.nih.gov/pmc/articles/%s");
36
            put("pubmedid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
37
            put("doi", "http://dx.doi.org/%s");
38
            put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
39
            put("nuccore", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
40
            put("swiss-prot", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
41
            put("arrayexpress", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
42
            put("biomodels", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
43
            put("bmrb", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
44
            put("ena", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
45
            put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
46
            put("geo", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
47
            put("ensembl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
48
            put("mgi", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
49
            put("bind", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
50
            put("pride", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
51
            put("ddbj", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
52
            put("bioproject", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
53
            put("embl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
54
            put("sra", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
55
        }
56 44355 claudio.at
57 51054 sandro.lab
    };
58 45717 sandro.lab
    public static Ontologies ontologies;
59
    private static DLIUtils instance;
60 51054 sandro.lab
    private static BiMap<String, String> relations;
61 46990 sandro.lab
62
    private Map<String, String> dataciteDatasource;
63
64 45628 sandro.lab
    @Autowired
65 51054 sandro.lab
    private UniqueServiceLocator serviceLocator;
66 44355 claudio.at
67 51054 sandro.lab
    private static BiMap<String, String> getRelationMap() {
68 35554 sandro.lab
69 51054 sandro.lab
        if (relations == null) {
70
            relations = HashBiMap.create();
71
            relations.put("IsCitedBy", "Cites");
72
            relations.put("IsSupplementTo", "IsSupplementedBy");
73
            relations.put("IsReferencedBy", "References");
74
        }
75
        return relations;
76
    }
77 34805 sandro.lab
78 51054 sandro.lab
    public static String getNameFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
79
        if (datasources.keySet().size() == 0) {
80
            generateDSMap();
81
        }
82
        if (!datasources.containsKey(datasourcePrefix))
83
            return "";
84
        return datasources.get(datasourcePrefix).getRight();
85
    }
86 44352 sandro.lab
87 51054 sandro.lab
    public static String getIdFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
88
        if (datasources.keySet().size() == 0) {
89
            generateDSMap();
90
        }
91
        if (!datasources.containsKey(datasourcePrefix))
92
            return "";
93
        return datasources.get(datasourcePrefix).getLeft();
94
    }
95 44352 sandro.lab
96 46990 sandro.lab
    public static String getPublisherName(final String publisher) {
97
        if (instance.getDataciteDatasource() != null) {
98
            return instance.getDataciteDatasource().get(publisher.trim().toLowerCase());
99
        }
100
        return "";
101
    }
102
103 51054 sandro.lab
    public static void generateDSMap() throws ISLookUpException {
104
        if (datasources.keySet().size() > 0)
105
            return;
106 44352 sandro.lab
107 51054 sandro.lab
        final String query = "for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') "
108
                + "return concat($x//FIELD[./key='NamespacePrefix']/value/text(),'@--@',$x//FIELD[./key='DataSourceId']/value/text(),'@--@',$x//ENGLISH_NAME )";
109 45717 sandro.lab
        final ISLookUpService lookupService = instance.getServiceLocator().getService(ISLookUpService.class);
110
        final List<String> results = lookupService.quickSearchProfile(query);
111
        datasources.clear();
112 51054 sandro.lab
        if (results != null)
113
            results.forEach(it -> {
114
                final String[] splitted = it.split("@--@");
115
                if (splitted.length == 3) {
116
                    datasources.put(splitted[0], new ImmutablePair<>(splitted[1], splitted[2]));
117
                }
118
            });
119
    }
120 44352 sandro.lab
121 51054 sandro.lab
    public static String inferPidType(final String pid, final String pidType) {
122
        if (pidType != null && !pidType.toLowerCase().equals("doi")) {
123
            if (pid != null && pid.contains("http://dx.doi.org/") || pid.contains("http://doi.org/"))
124
                return "doi";
125
        }
126
        return pidType;
127
    }
128 49054 sandro.lab
129 51054 sandro.lab
    public static String fixPID(String input) {
130
        if (input != null) {
131
            return input.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
132
        }
133
        return null;
134
    }
135 34937 sandro.lab
136 51637 sandro.lab
137
    public static String geussPidType(final String pidType, final String pid ) {
138
        if (isValidDoi(pid)!= null){
139
            return "doi";
140
        }
141
        return pidType;
142
    }
143
144
145
    public static String geussPidValue(final String pid) {
146
        if (isValidDoi(pid)!= null){
147
            return isValidDoi(pid);
148
        }
149
        return pid;
150
    }
151
152
153 54884 sandro.lab
    public static PID createCorrectPID(final String pid, final String pidType, final String resolvedUrl) {
154
        PID correctPID = createCorrectPID(pid, pidType);
155
        correctPID.setResolvedUrl(resolvedUrl);
156
        return correctPID;
157
    }
158 51637 sandro.lab
159 52724 sandro.lab
    public static PID createCorrectPID(final String pid, final String pidType) {
160
        final String validDoi = isValidDoi(pid);
161
        if (validDoi!= null) {
162
            return new PID(validDoi.toLowerCase(), "doi");
163
        }
164
        return new PID(pid, pidType);
165
    }
166 51637 sandro.lab
167
    public static String isValidDoi(final String url) {
168
169
170 54884 sandro.lab
        final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&'])\\S)+)";
171 51637 sandro.lab
172
173
        final Pattern pattern = Pattern.compile(regex);
174
        final Matcher matcher = pattern.matcher(url);
175
176
        if (matcher.find())
177
            return matcher.group(0);
178
179
180
        return null;
181
182
    }
183
184 45803 sandro.lab
    public static String normalizeRelation(final String relation) {
185 45628 sandro.lab
        if (relation == null || StringUtils.isEmpty(relation)) {
186
            return null;
187
        }
188
        return Character.toLowerCase(relation.charAt(0)) + relation.substring(1);
189 35554 sandro.lab
190 45628 sandro.lab
    }
191
192
    public static String getInverse(final String relation) throws Exception {
193
        if (ontologies == null) {
194
            ontologies = OntologyLoader.loadOntologiesFromIS();
195
        }
196
        final String normalizedRelation = normalizeRelation(relation);
197
198
199
        try {
200 51054 sandro.lab
            return ontologies.getTerms(normalizedRelation).stream().findFirst().get().getInverseCode();
201 45628 sandro.lab
        } catch (Throwable e) {
202
            System.out.println("Relation not found = " + normalizedRelation);
203
            return "related";
204
        }
205
    }
206 39927 sandro.lab
207 45427 sandro.lab
208 51054 sandro.lab
    public static String generateIdentifier(final String pid, final String pidtype) {
209
        if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidtype))
210
            throw new RuntimeException("Error pid or pidtype cannot be null");
211
        return DnetXsltFunctions.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidtype.toLowerCase().trim()));
212
    }
213 44352 sandro.lab
214 45717 sandro.lab
    /**
215
     * This method is used only for test Scope
216
     *
217
     * @param mockInstance
218
     */
219
    public static void setInstance(final DLIUtils mockInstance) {
220
        instance = mockInstance;
221
    }
222
223 51054 sandro.lab
    @PostConstruct
224 46990 sandro.lab
    public void registerInstance() throws Exception {
225
        instance = this;
226
        final InputStream inputStream = this.getClass().getResourceAsStream("/eu/dnetlib/transformation/data-center.json");
227
        dataciteDatasource = new Gson().fromJson(IOUtils.toString(inputStream), Map.class);
228
    }
229 44352 sandro.lab
230 51054 sandro.lab
    public UniqueServiceLocator getServiceLocator() {
231
        return serviceLocator;
232
    }
233 44352 sandro.lab
234 51054 sandro.lab
    public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
235
        this.serviceLocator = serviceLocator;
236
    }
237 46990 sandro.lab
238
    public Map<String, String> getDataciteDatasource() {
239
        return dataciteDatasource;
240
    }
241 34805 sandro.lab
}