1
|
package eu.dnetlib.dli;
|
2
|
|
3
|
import java.io.InputStream;
|
4
|
import java.util.HashMap;
|
5
|
import java.util.List;
|
6
|
import java.util.Map;
|
7
|
import javax.annotation.PostConstruct;
|
8
|
|
9
|
import com.google.common.collect.BiMap;
|
10
|
import com.google.common.collect.HashBiMap;
|
11
|
import com.google.gson.Gson;
|
12
|
import eu.dnetlib.data.transform.Ontologies;
|
13
|
import eu.dnetlib.data.transform.OntologyLoader;
|
14
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
15
|
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
|
16
|
import eu.dnetlib.pid.resolver.model.PID;
|
17
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
18
|
import eu.dnetlib.rmi.enabling.ISLookUpService;
|
19
|
import org.apache.commons.io.IOUtils;
|
20
|
import org.apache.commons.lang3.StringUtils;
|
21
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
22
|
import org.apache.commons.lang3.tuple.Pair;
|
23
|
import org.springframework.beans.factory.annotation.Autowired;
|
24
|
import java.util.regex.Matcher;
|
25
|
import java.util.regex.Pattern;
|
26
|
|
27
|
public class DLIUtils {
|
28
|
|
29
|
public final static Map<String, Pair<String, String>> datasources = new HashMap<>();
|
30
|
public static final Map<String, String> resolvedTypes = new HashMap<String, String>() {
|
31
|
{
|
32
|
put("pdb", "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s");
|
33
|
put("ncbi-n", "http://www.ncbi.nlm.nih.gov/gquery/?term=%s");
|
34
|
put("pmid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
|
35
|
put("pmcid", "http://www.ncbi.nlm.nih.gov/pmc/articles/%s");
|
36
|
put("pubmedid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
|
37
|
put("doi", "http://dx.doi.org/%s");
|
38
|
put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
39
|
put("nuccore", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
40
|
put("swiss-prot", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
41
|
put("arrayexpress", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
42
|
put("biomodels", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
43
|
put("bmrb", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
44
|
put("ena", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
45
|
put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
46
|
put("geo", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
47
|
put("ensembl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
48
|
put("mgi", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
49
|
put("bind", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
50
|
put("pride", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
51
|
put("ddbj", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
52
|
put("bioproject", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
53
|
put("embl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
54
|
put("sra", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
55
|
}
|
56
|
|
57
|
};
|
58
|
public static Ontologies ontologies;
|
59
|
private static DLIUtils instance;
|
60
|
private static BiMap<String, String> relations;
|
61
|
|
62
|
private Map<String, String> dataciteDatasource;
|
63
|
|
64
|
@Autowired
|
65
|
private UniqueServiceLocator serviceLocator;
|
66
|
|
67
|
private static BiMap<String, String> getRelationMap() {
|
68
|
|
69
|
if (relations == null) {
|
70
|
relations = HashBiMap.create();
|
71
|
relations.put("IsCitedBy", "Cites");
|
72
|
relations.put("IsSupplementTo", "IsSupplementedBy");
|
73
|
relations.put("IsReferencedBy", "References");
|
74
|
}
|
75
|
return relations;
|
76
|
}
|
77
|
|
78
|
public static String getNameFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
|
79
|
if (datasources.keySet().size() == 0) {
|
80
|
generateDSMap();
|
81
|
}
|
82
|
if (!datasources.containsKey(datasourcePrefix))
|
83
|
return "";
|
84
|
return datasources.get(datasourcePrefix).getRight();
|
85
|
}
|
86
|
|
87
|
public static String getIdFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
|
88
|
if (datasources.keySet().size() == 0) {
|
89
|
generateDSMap();
|
90
|
}
|
91
|
if (!datasources.containsKey(datasourcePrefix))
|
92
|
return "";
|
93
|
return datasources.get(datasourcePrefix).getLeft();
|
94
|
}
|
95
|
|
96
|
public static String getPublisherName(final String publisher) {
|
97
|
if (instance.getDataciteDatasource() != null) {
|
98
|
return instance.getDataciteDatasource().get(publisher.trim().toLowerCase());
|
99
|
}
|
100
|
return "";
|
101
|
}
|
102
|
|
103
|
public static void generateDSMap() throws ISLookUpException {
|
104
|
if (datasources.keySet().size() > 0)
|
105
|
return;
|
106
|
|
107
|
final String query = "for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') "
|
108
|
+ "return concat($x//FIELD[./key='NamespacePrefix']/value/text(),'@--@',$x//FIELD[./key='DataSourceId']/value/text(),'@--@',$x//ENGLISH_NAME )";
|
109
|
final ISLookUpService lookupService = instance.getServiceLocator().getService(ISLookUpService.class);
|
110
|
final List<String> results = lookupService.quickSearchProfile(query);
|
111
|
datasources.clear();
|
112
|
if (results != null)
|
113
|
results.forEach(it -> {
|
114
|
final String[] splitted = it.split("@--@");
|
115
|
if (splitted.length == 3) {
|
116
|
datasources.put(splitted[0], new ImmutablePair<>(splitted[1], splitted[2]));
|
117
|
}
|
118
|
});
|
119
|
}
|
120
|
|
121
|
public static String inferPidType(final String pid, final String pidType) {
|
122
|
if (pidType != null && !pidType.toLowerCase().equals("doi")) {
|
123
|
if (pid != null && pid.contains("http://dx.doi.org/") || pid.contains("http://doi.org/"))
|
124
|
return "doi";
|
125
|
}
|
126
|
return pidType;
|
127
|
}
|
128
|
|
129
|
public static String fixPID(String input) {
|
130
|
if (input != null) {
|
131
|
return input.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
|
132
|
}
|
133
|
return null;
|
134
|
}
|
135
|
|
136
|
|
137
|
public static String geussPidType(final String pidType, final String pid ) {
|
138
|
if (isValidDoi(pid)!= null){
|
139
|
return "doi";
|
140
|
}
|
141
|
return pidType;
|
142
|
}
|
143
|
|
144
|
|
145
|
public static String geussPidValue(final String pid) {
|
146
|
if (isValidDoi(pid)!= null){
|
147
|
return isValidDoi(pid);
|
148
|
}
|
149
|
return pid;
|
150
|
}
|
151
|
|
152
|
|
153
|
public static PID createCorrectPID(final String pid, final String pidType, final String resolvedUrl) {
|
154
|
PID correctPID = createCorrectPID(pid, pidType);
|
155
|
correctPID.setResolvedUrl(resolvedUrl);
|
156
|
return correctPID;
|
157
|
}
|
158
|
|
159
|
public static PID createCorrectPID(final String pid, final String pidType) {
|
160
|
final String validDoi = isValidDoi(pid);
|
161
|
if (validDoi!= null) {
|
162
|
return new PID(validDoi.toLowerCase(), "doi");
|
163
|
}
|
164
|
return new PID(pid, pidType);
|
165
|
}
|
166
|
|
167
|
public static String isValidDoi(final String url) {
|
168
|
|
169
|
|
170
|
final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&'])\\S)+)";
|
171
|
|
172
|
|
173
|
final Pattern pattern = Pattern.compile(regex);
|
174
|
final Matcher matcher = pattern.matcher(url);
|
175
|
|
176
|
if (matcher.find())
|
177
|
return matcher.group(0);
|
178
|
|
179
|
|
180
|
return null;
|
181
|
|
182
|
}
|
183
|
|
184
|
public static String normalizeRelation(final String relation) {
|
185
|
if (relation == null || StringUtils.isEmpty(relation)) {
|
186
|
return null;
|
187
|
}
|
188
|
return Character.toLowerCase(relation.charAt(0)) + relation.substring(1);
|
189
|
|
190
|
}
|
191
|
|
192
|
public static String getInverse(final String relation) throws Exception {
|
193
|
if (ontologies == null) {
|
194
|
ontologies = OntologyLoader.loadOntologiesFromIS();
|
195
|
}
|
196
|
final String normalizedRelation = normalizeRelation(relation);
|
197
|
|
198
|
|
199
|
try {
|
200
|
return ontologies.getTerms(normalizedRelation).stream().findFirst().get().getInverseCode();
|
201
|
} catch (Throwable e) {
|
202
|
System.out.println("Relation not found = " + normalizedRelation);
|
203
|
return "related";
|
204
|
}
|
205
|
}
|
206
|
|
207
|
|
208
|
public static String generateIdentifier(final String pid, final String pidtype) {
|
209
|
if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidtype))
|
210
|
throw new RuntimeException("Error pid or pidtype cannot be null");
|
211
|
return DnetXsltFunctions.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidtype.toLowerCase().trim()));
|
212
|
}
|
213
|
|
214
|
/**
|
215
|
* This method is used only for test Scope
|
216
|
*
|
217
|
* @param mockInstance
|
218
|
*/
|
219
|
public static void setInstance(final DLIUtils mockInstance) {
|
220
|
instance = mockInstance;
|
221
|
}
|
222
|
|
223
|
@PostConstruct
|
224
|
public void registerInstance() throws Exception {
|
225
|
instance = this;
|
226
|
final InputStream inputStream = this.getClass().getResourceAsStream("/eu/dnetlib/transformation/data-center.json");
|
227
|
dataciteDatasource = new Gson().fromJson(IOUtils.toString(inputStream), Map.class);
|
228
|
}
|
229
|
|
230
|
public UniqueServiceLocator getServiceLocator() {
|
231
|
return serviceLocator;
|
232
|
}
|
233
|
|
234
|
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
|
235
|
this.serviceLocator = serviceLocator;
|
236
|
}
|
237
|
|
238
|
public Map<String, String> getDataciteDatasource() {
|
239
|
return dataciteDatasource;
|
240
|
}
|
241
|
}
|