1 |
43618
|
sandro.lab
|
package eu.dnetlib.dli;
|
2 |
34805
|
sandro.lab
|
|
3 |
46990
|
sandro.lab
|
import java.io.InputStream;
|
4 |
44352
|
sandro.lab
|
import java.util.HashMap;
|
5 |
|
|
import java.util.List;
|
6 |
|
|
import java.util.Map;
|
7 |
|
|
import javax.annotation.PostConstruct;
|
8 |
|
|
|
9 |
34805
|
sandro.lab
|
import com.google.common.collect.BiMap;
|
10 |
|
|
import com.google.common.collect.HashBiMap;
|
11 |
46990
|
sandro.lab
|
import com.google.gson.Gson;
|
12 |
45427
|
sandro.lab
|
import eu.dnetlib.data.transform.Ontologies;
|
13 |
|
|
import eu.dnetlib.data.transform.OntologyLoader;
|
14 |
44352
|
sandro.lab
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
15 |
35554
|
sandro.lab
|
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
|
16 |
52724
|
sandro.lab
|
import eu.dnetlib.pid.resolver.model.PID;
|
17 |
44352
|
sandro.lab
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
18 |
|
|
import eu.dnetlib.rmi.enabling.ISLookUpService;
|
19 |
46990
|
sandro.lab
|
import org.apache.commons.io.IOUtils;
|
20 |
43618
|
sandro.lab
|
import org.apache.commons.lang3.StringUtils;
|
21 |
44352
|
sandro.lab
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
22 |
|
|
import org.apache.commons.lang3.tuple.Pair;
|
23 |
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
24 |
51637
|
sandro.lab
|
import java.util.regex.Matcher;
|
25 |
|
|
import java.util.regex.Pattern;
|
26 |
34805
|
sandro.lab
|
|
27 |
35554
|
sandro.lab
|
public class DLIUtils {
|
28 |
34805
|
sandro.lab
|
|
29 |
51054
|
sandro.lab
|
public final static Map<String, Pair<String, String>> datasources = new HashMap<>();
|
30 |
|
|
public static final Map<String, String> resolvedTypes = new HashMap<String, String>() {
|
31 |
|
|
{
|
32 |
|
|
put("pdb", "http://www.rcsb.org/pdb/explore/explore.do?structureId=%s");
|
33 |
|
|
put("ncbi-n", "http://www.ncbi.nlm.nih.gov/gquery/?term=%s");
|
34 |
|
|
put("pmid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
|
35 |
|
|
put("pmcid", "http://www.ncbi.nlm.nih.gov/pmc/articles/%s");
|
36 |
|
|
put("pubmedid", "http://www.ncbi.nlm.nih.gov/pubmed/%s");
|
37 |
|
|
put("doi", "http://dx.doi.org/%s");
|
38 |
|
|
put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
39 |
|
|
put("nuccore", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
40 |
|
|
put("swiss-prot", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
41 |
|
|
put("arrayexpress", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
42 |
|
|
put("biomodels", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
43 |
|
|
put("bmrb", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
44 |
|
|
put("ena", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
45 |
|
|
put("genbank", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
46 |
|
|
put("geo", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
47 |
|
|
put("ensembl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
48 |
|
|
put("mgi", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
49 |
|
|
put("bind", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
50 |
|
|
put("pride", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
51 |
|
|
put("ddbj", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
52 |
|
|
put("bioproject", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
53 |
|
|
put("embl", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
54 |
|
|
put("sra", "http://www.ncbi.nlm.nih.gov/nucest/%s?report=genbank");
|
55 |
|
|
}
|
56 |
44355
|
claudio.at
|
|
57 |
51054
|
sandro.lab
|
};
|
58 |
45717
|
sandro.lab
|
public static Ontologies ontologies;
|
59 |
|
|
private static DLIUtils instance;
|
60 |
51054
|
sandro.lab
|
private static BiMap<String, String> relations;
|
61 |
46990
|
sandro.lab
|
|
62 |
|
|
private Map<String, String> dataciteDatasource;
|
63 |
|
|
|
64 |
45628
|
sandro.lab
|
@Autowired
|
65 |
51054
|
sandro.lab
|
private UniqueServiceLocator serviceLocator;
|
66 |
44355
|
claudio.at
|
|
67 |
51054
|
sandro.lab
|
private static BiMap<String, String> getRelationMap() {
|
68 |
35554
|
sandro.lab
|
|
69 |
51054
|
sandro.lab
|
if (relations == null) {
|
70 |
|
|
relations = HashBiMap.create();
|
71 |
|
|
relations.put("IsCitedBy", "Cites");
|
72 |
|
|
relations.put("IsSupplementTo", "IsSupplementedBy");
|
73 |
|
|
relations.put("IsReferencedBy", "References");
|
74 |
|
|
}
|
75 |
|
|
return relations;
|
76 |
|
|
}
|
77 |
34805
|
sandro.lab
|
|
78 |
51054
|
sandro.lab
|
public static String getNameFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
|
79 |
|
|
if (datasources.keySet().size() == 0) {
|
80 |
|
|
generateDSMap();
|
81 |
|
|
}
|
82 |
|
|
if (!datasources.containsKey(datasourcePrefix))
|
83 |
|
|
return "";
|
84 |
|
|
return datasources.get(datasourcePrefix).getRight();
|
85 |
|
|
}
|
86 |
44352
|
sandro.lab
|
|
87 |
51054
|
sandro.lab
|
public static String getIdFromDataSourcePrefix(final String datasourcePrefix) throws ISLookUpException {
|
88 |
|
|
if (datasources.keySet().size() == 0) {
|
89 |
|
|
generateDSMap();
|
90 |
|
|
}
|
91 |
|
|
if (!datasources.containsKey(datasourcePrefix))
|
92 |
|
|
return "";
|
93 |
|
|
return datasources.get(datasourcePrefix).getLeft();
|
94 |
|
|
}
|
95 |
44352
|
sandro.lab
|
|
96 |
46990
|
sandro.lab
|
public static String getPublisherName(final String publisher) {
|
97 |
|
|
if (instance.getDataciteDatasource() != null) {
|
98 |
|
|
return instance.getDataciteDatasource().get(publisher.trim().toLowerCase());
|
99 |
|
|
}
|
100 |
|
|
return "";
|
101 |
|
|
}
|
102 |
|
|
|
103 |
51054
|
sandro.lab
|
public static void generateDSMap() throws ISLookUpException {
|
104 |
|
|
if (datasources.keySet().size() > 0)
|
105 |
|
|
return;
|
106 |
44352
|
sandro.lab
|
|
107 |
51054
|
sandro.lab
|
final String query = "for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') "
|
108 |
|
|
+ "return concat($x//FIELD[./key='NamespacePrefix']/value/text(),'@--@',$x//FIELD[./key='DataSourceId']/value/text(),'@--@',$x//ENGLISH_NAME )";
|
109 |
45717
|
sandro.lab
|
final ISLookUpService lookupService = instance.getServiceLocator().getService(ISLookUpService.class);
|
110 |
|
|
final List<String> results = lookupService.quickSearchProfile(query);
|
111 |
|
|
datasources.clear();
|
112 |
51054
|
sandro.lab
|
if (results != null)
|
113 |
|
|
results.forEach(it -> {
|
114 |
|
|
final String[] splitted = it.split("@--@");
|
115 |
|
|
if (splitted.length == 3) {
|
116 |
|
|
datasources.put(splitted[0], new ImmutablePair<>(splitted[1], splitted[2]));
|
117 |
|
|
}
|
118 |
|
|
});
|
119 |
|
|
}
|
120 |
44352
|
sandro.lab
|
|
121 |
51054
|
sandro.lab
|
public static String inferPidType(final String pid, final String pidType) {
|
122 |
|
|
if (pidType != null && !pidType.toLowerCase().equals("doi")) {
|
123 |
|
|
if (pid != null && pid.contains("http://dx.doi.org/") || pid.contains("http://doi.org/"))
|
124 |
|
|
return "doi";
|
125 |
|
|
}
|
126 |
|
|
return pidType;
|
127 |
|
|
}
|
128 |
49054
|
sandro.lab
|
|
129 |
51054
|
sandro.lab
|
public static String fixPID(String input) {
|
130 |
|
|
if (input != null) {
|
131 |
|
|
return input.replace("http://dx.doi.org/", "").replace("http://doi.org/", "");
|
132 |
|
|
}
|
133 |
|
|
return null;
|
134 |
|
|
}
|
135 |
34937
|
sandro.lab
|
|
136 |
51637
|
sandro.lab
|
|
137 |
|
|
public static String geussPidType(final String pidType, final String pid ) {
|
138 |
|
|
if (isValidDoi(pid)!= null){
|
139 |
|
|
return "doi";
|
140 |
|
|
}
|
141 |
|
|
return pidType;
|
142 |
|
|
}
|
143 |
|
|
|
144 |
|
|
|
145 |
|
|
public static String geussPidValue(final String pid) {
|
146 |
|
|
if (isValidDoi(pid)!= null){
|
147 |
|
|
return isValidDoi(pid);
|
148 |
|
|
}
|
149 |
|
|
return pid;
|
150 |
|
|
}
|
151 |
|
|
|
152 |
|
|
|
153 |
54884
|
sandro.lab
|
public static PID createCorrectPID(final String pid, final String pidType, final String resolvedUrl) {
|
154 |
|
|
PID correctPID = createCorrectPID(pid, pidType);
|
155 |
|
|
correctPID.setResolvedUrl(resolvedUrl);
|
156 |
|
|
return correctPID;
|
157 |
|
|
}
|
158 |
51637
|
sandro.lab
|
|
159 |
52724
|
sandro.lab
|
public static PID createCorrectPID(final String pid, final String pidType) {
|
160 |
|
|
final String validDoi = isValidDoi(pid);
|
161 |
|
|
if (validDoi!= null) {
|
162 |
|
|
return new PID(validDoi.toLowerCase(), "doi");
|
163 |
|
|
}
|
164 |
|
|
return new PID(pid, pidType);
|
165 |
|
|
}
|
166 |
51637
|
sandro.lab
|
|
167 |
|
|
public static String isValidDoi(final String url) {
|
168 |
|
|
|
169 |
|
|
|
170 |
54884
|
sandro.lab
|
final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&'])\\S)+)";
|
171 |
51637
|
sandro.lab
|
|
172 |
|
|
|
173 |
|
|
final Pattern pattern = Pattern.compile(regex);
|
174 |
|
|
final Matcher matcher = pattern.matcher(url);
|
175 |
|
|
|
176 |
|
|
if (matcher.find())
|
177 |
|
|
return matcher.group(0);
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
return null;
|
181 |
|
|
|
182 |
|
|
}
|
183 |
|
|
|
184 |
45803
|
sandro.lab
|
public static String normalizeRelation(final String relation) {
|
185 |
45628
|
sandro.lab
|
if (relation == null || StringUtils.isEmpty(relation)) {
|
186 |
|
|
return null;
|
187 |
|
|
}
|
188 |
|
|
return Character.toLowerCase(relation.charAt(0)) + relation.substring(1);
|
189 |
35554
|
sandro.lab
|
|
190 |
45628
|
sandro.lab
|
}
|
191 |
|
|
|
192 |
|
|
public static String getInverse(final String relation) throws Exception {
|
193 |
|
|
if (ontologies == null) {
|
194 |
|
|
ontologies = OntologyLoader.loadOntologiesFromIS();
|
195 |
|
|
}
|
196 |
|
|
final String normalizedRelation = normalizeRelation(relation);
|
197 |
|
|
|
198 |
|
|
|
199 |
|
|
try {
|
200 |
51054
|
sandro.lab
|
return ontologies.getTerms(normalizedRelation).stream().findFirst().get().getInverseCode();
|
201 |
45628
|
sandro.lab
|
} catch (Throwable e) {
|
202 |
|
|
System.out.println("Relation not found = " + normalizedRelation);
|
203 |
|
|
return "related";
|
204 |
|
|
}
|
205 |
|
|
}
|
206 |
39927
|
sandro.lab
|
|
207 |
45427
|
sandro.lab
|
|
208 |
51054
|
sandro.lab
|
public static String generateIdentifier(final String pid, final String pidtype) {
|
209 |
|
|
if (StringUtils.isBlank(pid) || StringUtils.isBlank(pidtype))
|
210 |
|
|
throw new RuntimeException("Error pid or pidtype cannot be null");
|
211 |
|
|
return DnetXsltFunctions.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidtype.toLowerCase().trim()));
|
212 |
|
|
}
|
213 |
44352
|
sandro.lab
|
|
214 |
45717
|
sandro.lab
|
/**
|
215 |
|
|
* This method is used only for test Scope
|
216 |
|
|
*
|
217 |
|
|
* @param mockInstance
|
218 |
|
|
*/
|
219 |
|
|
public static void setInstance(final DLIUtils mockInstance) {
|
220 |
|
|
instance = mockInstance;
|
221 |
|
|
}
|
222 |
|
|
|
223 |
51054
|
sandro.lab
|
@PostConstruct
|
224 |
46990
|
sandro.lab
|
public void registerInstance() throws Exception {
|
225 |
|
|
instance = this;
|
226 |
|
|
final InputStream inputStream = this.getClass().getResourceAsStream("/eu/dnetlib/transformation/data-center.json");
|
227 |
|
|
dataciteDatasource = new Gson().fromJson(IOUtils.toString(inputStream), Map.class);
|
228 |
|
|
}
|
229 |
44352
|
sandro.lab
|
|
230 |
51054
|
sandro.lab
|
public UniqueServiceLocator getServiceLocator() {
|
231 |
|
|
return serviceLocator;
|
232 |
|
|
}
|
233 |
44352
|
sandro.lab
|
|
234 |
51054
|
sandro.lab
|
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
|
235 |
|
|
this.serviceLocator = serviceLocator;
|
236 |
|
|
}
|
237 |
46990
|
sandro.lab
|
|
238 |
|
|
public Map<String, String> getDataciteDatasource() {
|
239 |
|
|
return dataciteDatasource;
|
240 |
|
|
}
|
241 |
34805
|
sandro.lab
|
}
|