Project

General

Profile

1
package eu.dnetlib.dli;
2

    
3
import java.io.InputStream;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import javax.annotation.PostConstruct;
8

    
9
import com.google.common.collect.BiMap;
10
import com.google.common.collect.HashBiMap;
11
import com.google.gson.Gson;
12
import eu.dnetlib.data.transform.Ontologies;
13
import eu.dnetlib.data.transform.OntologyLoader;
14
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
15
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
16
import eu.dnetlib.pid.resolver.model.PID;
17
import eu.dnetlib.rmi.enabling.ISLookUpException;
18
import eu.dnetlib.rmi.enabling.ISLookUpService;
19
import org.apache.commons.io.IOUtils;
20
import org.apache.commons.lang3.StringUtils;
21
import org.apache.commons.lang3.tuple.ImmutablePair;
22
import org.apache.commons.lang3.tuple.Pair;
23
import org.springframework.beans.factory.annotation.Autowired;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
public class DLIUtils {
28

    
29
    public final static Map<String, Pair<String, String>> datasources = new HashMap<>();
30
    public static final Map<String, String> resolvedTypes = new HashMap<String, String>() {
31
        {
32
            put("pdb", "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s");
33
            put("ncbi-n", "http://www.ncbi.nlm.nih.gov/gquery/?term=%s");
34
            put("pmid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
35
            put("pmcid", "http://www.ncbi.nlm.nih.gov/pmc/articles/%s");
36
            put("pubmedid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
37
            put("doi", "http://dx.doi.org/%s");
38
            put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
39
            put("nuccore", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
40
            put("swiss-prot", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
41
            put("arrayexpress", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
42
            put("biomodels", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
43
            put("bmrb", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
44
            put("ena", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
45
            put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
46
            put("geo", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
47
            put("ensembl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
48
            put("mgi", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
49
            put("bind", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
50
            put("pride", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
51
            put("ddbj", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
52
            put("bioproject", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
53
            put("embl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
54
            put("sra", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
55
        }
56

    
57
    };
58
    public static Ontologies ontologies;
59
    private static DLIUtils instance;
60
    private static BiMap<String, String> relations;
61

    
62
    private Map<String, String> dataciteDatasource;
63

    
64
    @Autowired
65
    private UniqueServiceLocator serviceLocator;
66

    
67
    private static BiMap<String, String> getRelationMap() {
68

    
69
        if (relations == null) {
70
            relations = HashBiMap.create();
71
            relations.put("IsCitedBy", "Cites");
72
            relations.put("IsSupplementTo", "IsSupplementedBy");
73
            relations.put("IsReferencedBy", "References");
74
        }
75
        return relations;
76
    }
77

    
78
    public static String getNameFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
79
        if (datasources.keySet().size() == 0) {
80
            generateDSMap();
81
        }
82
        if (!datasources.containsKey(datasourcePrefix))
83
            return "";
84
        return datasources.get(datasourcePrefix).getRight();
85
    }
86

    
87
    public static String getIdFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
88
        if (datasources.keySet().size() == 0) {
89
            generateDSMap();
90
        }
91
        if (!datasources.containsKey(datasourcePrefix))
92
            return "";
93
        return datasources.get(datasourcePrefix).getLeft();
94
    }
95

    
96
    public static String getPublisherName(final String publisher) {
97
        if (instance.getDataciteDatasource() != null) {
98
            return instance.getDataciteDatasource().get(publisher.trim().toLowerCase());
99
        }
100
        return "";
101
    }
102

    
103
    public static void generateDSMap() throws ISLookUpException {
104
        if (datasources.keySet().size() > 0)
105
            return;
106

    
107
        final String query = "for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') "
108
                + "return concat($x//FIELD[./key='NamespacePrefix']/value/text(),'@--@',$x//FIELD[./key='DataSourceId']/value/text(),'@--@',$x//ENGLISH_NAME )";
109
        final ISLookUpService lookupService = instance.getServiceLocator().getService(ISLookUpService.class);
110
        final List<String> results = lookupService.quickSearchProfile(query);
111
        datasources.clear();
112
        if (results != null)
113
            results.forEach(it -> {
114
                final String[] splitted = it.split("@--@");
115
                if (splitted.length == 3) {
116
                    datasources.put(splitted[0], new ImmutablePair<>(splitted[1], splitted[2]));
117
                }
118
            });
119
    }
120

    
121
    public static String inferPidType(final String pid, final String pidType) {
122
        if (pidType != null && !pidType.toLowerCase().equals("doi")) {
123
            if (pid != null && pid.contains("http://dx.doi.org/") || pid.contains("http://doi.org/"))
124
                return "doi";
125
        }
126
        return pidType;
127
    }
128

    
129
    public static String fixPID(String input) {
130
        if (input != null) {
131
            return input.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
132
        }
133
        return null;
134
    }
135

    
136

    
137
    public static String geussPidType(final String pidType, final String pid ) {
138
        if (isValidDoi(pid)!= null){
139
            return "doi";
140
        }
141
        return pidType;
142
    }
143

    
144

    
145
    public static String geussPidValue(final String pid) {
146
        if (isValidDoi(pid)!= null){
147
            return isValidDoi(pid);
148
        }
149
        return pid;
150
    }
151

    
152

    
153
    public static PID createCorrectPID(final String pid, final String pidType, final String resolvedUrl) {
154
        PID correctPID = createCorrectPID(pid, pidType);
155
        correctPID.setResolvedUrl(resolvedUrl);
156
        return correctPID;
157
    }
158

    
159
    public static PID createCorrectPID(final String pid, final String pidType) {
160
        final String validDoi = isValidDoi(pid);
161
        if (validDoi!= null) {
162
            return new PID(validDoi.toLowerCase(), "doi");
163
        }
164
        return new PID(pid, pidType);
165
    }
166

    
167
    public static String isValidDoi(final String url) {
168

    
169

    
170
        final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&'])\\S)+)";
171

    
172

    
173
        final Pattern pattern = Pattern.compile(regex);
174
        final Matcher matcher = pattern.matcher(url);
175

    
176
        if (matcher.find())
177
            return matcher.group(0);
178

    
179

    
180
        return null;
181

    
182
    }
183

    
184
    public static String normalizeRelation(final String relation) {
185
        if (relation == null || StringUtils.isEmpty(relation)) {
186
            return null;
187
        }
188
        return Character.toLowerCase(relation.charAt(0)) + relation.substring(1);
189

    
190
    }
191

    
192
    public static String getInverse(final String relation) throws Exception {
193
        if (ontologies == null) {
194
            ontologies = OntologyLoader.loadOntologiesFromIS();
195
        }
196
        final String normalizedRelation = normalizeRelation(relation);
197

    
198

    
199
        try {
200
            return ontologies.getTerms(normalizedRelation).stream().findFirst().get().getInverseCode();
201
        } catch (Throwable e) {
202
            System.out.println("Relation not found = " + normalizedRelation);
203
            return "related";
204
        }
205
    }
206

    
207

    
208
    public static String generateIdentifier(final String pid, final String pidtype) {
209
        if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidtype))
210
            throw new RuntimeException("Error pid or pidtype cannot be null");
211
        return DnetXsltFunctions.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidtype.toLowerCase().trim()));
212
    }
213

    
214
    /**
215
     * This method is used only for test Scope
216
     *
217
     * @param mockInstance
218
     */
219
    public static void setInstance(final DLIUtils mockInstance) {
220
        instance = mockInstance;
221
    }
222

    
223
    @PostConstruct
224
    public void registerInstance() throws Exception {
225
        instance = this;
226
        final InputStream inputStream = this.getClass().getResourceAsStream("/eu/dnetlib/transformation/data-center.json");
227
        dataciteDatasource = new Gson().fromJson(IOUtils.toString(inputStream), Map.class);
228
    }
229

    
230
    public UniqueServiceLocator getServiceLocator() {
231
        return serviceLocator;
232
    }
233

    
234
    public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
235
        this.serviceLocator = serviceLocator;
236
    }
237

    
238
    public Map<String, String> getDataciteDatasource() {
239
        return dataciteDatasource;
240
    }
241
}
    (1-1/1)