1
|
package eu.dnetlib.openaire.directindex.api;
|
2
|
|
3
|
import java.io.StringWriter;
|
4
|
import java.util.ArrayList;
|
5
|
import java.util.Collection;
|
6
|
import java.util.HashMap;
|
7
|
import java.util.HashSet;
|
8
|
import java.util.List;
|
9
|
import java.util.Map;
|
10
|
import java.util.stream.Collectors;
|
11
|
|
12
|
import org.apache.commons.lang.StringUtils;
|
13
|
import org.springframework.web.client.RestClientException;
|
14
|
import org.springframework.web.client.RestTemplate;
|
15
|
|
16
|
import com.google.common.collect.Sets;
|
17
|
|
18
|
import eu.dnetlib.miscutils.functional.hash.Hashing;
|
19
|
import eu.dnetlib.openaire.directindex.objects.DatasourceEntry;
|
20
|
import eu.dnetlib.openaire.directindex.objects.DsmSearchRequest;
|
21
|
import eu.dnetlib.openaire.directindex.objects.DsmSearchResponse;
|
22
|
import eu.dnetlib.openaire.directindex.objects.ZenodoContextList;
|
23
|
import eu.dnetlib.openaire.directindex.utils.CorrectionUtils;
|
24
|
|
25
|
/**
|
26
|
* Created by michele on 15/01/16.
|
27
|
*/
|
28
|
public class OpenAIRESubmitterUtils {
|
29
|
|
30
|
private static final org.apache.commons.logging.Log log = org.apache.commons.logging.LogFactory.getLog(OpenAIRESubmitterUtils.class);
|
31
|
private static final String ZENODO_COMMUNITY = "zenodo.org/communities/";
|
32
|
private final String community_api;
|
33
|
private final String dsm_api;
|
34
|
private final CorrectionUtils correctionUtils;
|
35
|
|
36
|
public OpenAIRESubmitterUtils(final String community_api, final String dsm_api, final CorrectionUtils correctionUtils) {
|
37
|
this.community_api = community_api;
|
38
|
this.dsm_api = dsm_api;
|
39
|
this.correctionUtils = correctionUtils;
|
40
|
}
|
41
|
|
42
|
public DatasourceEntry findDatasource(final String dsId) {
|
43
|
final RestTemplate rt = new RestTemplate();
|
44
|
final String url = dsm_api + "/searchdetails/0/1?requestSortBy=id&order=ASCENDING";
|
45
|
try {
|
46
|
final DsmSearchResponse res = rt.postForObject(url, new DsmSearchRequest(dsId), DsmSearchResponse.class);
|
47
|
return res.getDatasourceInfo()
|
48
|
.stream()
|
49
|
.map(ds -> new DatasourceEntry(ds.getId(), ds.getOfficialname(), ds.getNamespaceprefix()))
|
50
|
.findFirst()
|
51
|
.orElse(null);
|
52
|
} catch (final RestClientException rce) {
|
53
|
log.error("Unable to get object for " + url);
|
54
|
return null;
|
55
|
}
|
56
|
}
|
57
|
|
58
|
public Map<String, String> calculateProjectInfo(final String link) {
|
59
|
final Map<String, String> info = new HashMap<>();
|
60
|
final String[] arr = link.split("/");
|
61
|
// info:eu-repo/grantAgreement/EC/FP7/244909/EU/Making Capabilities Work/WorkAble
|
62
|
|
63
|
if (arr.length > 4) {
|
64
|
final String acronym = arr.length > 7 ? arr[7] : "";
|
65
|
final String title = arr.length > 6 ? StringUtils.isNotBlank(arr[6]) ? arr[6] : acronym : "";
|
66
|
final String jurisdiction = arr.length > 5 ? arr[5] : "";
|
67
|
final String funderId = calculateFunderId(arr[2], arr[3]);
|
68
|
final String funderShortName = fixFunderShortName(arr[2]);
|
69
|
final String fundingName = fixFundingName(funderShortName, arr[3]);
|
70
|
info.put("id", calculateProjectId(arr[2], arr[3], arr[4]));
|
71
|
info.put("funderShortName", fixFunderShortName(arr[2]));
|
72
|
info.put("fundingName", fundingName);
|
73
|
info.put("code", unescape(arr[4]));
|
74
|
info.put("jurisdiction", jurisdiction);
|
75
|
info.put("title", title);
|
76
|
info.put("acronym", acronym);
|
77
|
info.put("funderId", funderId);
|
78
|
info.put("funderName", calculateFunderName(arr[2]));
|
79
|
if (StringUtils.isNotBlank(arr[3])) {
|
80
|
info.put("fundingId", funderId + "::" + fundingName);
|
81
|
}
|
82
|
}
|
83
|
return info;
|
84
|
}
|
85
|
|
86
|
// TODO: remove me when Zenodo ingests the good UKRI projects
|
87
|
protected String fixFunderShortName(final String funderShortName) {
|
88
|
switch (funderShortName) {
|
89
|
case "RCUK":
|
90
|
return "UKRI";
|
91
|
default:
|
92
|
return funderShortName;
|
93
|
}
|
94
|
}
|
95
|
|
96
|
protected String calculateFunderPrefix(final String funderShortName, final String funding) {
|
97
|
switch (funderShortName.toLowerCase()) {
|
98
|
case "chist-era":
|
99
|
return "chistera____::";
|
100
|
case "conicyt":
|
101
|
return "conicytf____::";
|
102
|
case "dfg":
|
103
|
return "dfgf________::";
|
104
|
case "ec":
|
105
|
switch (funding.toLowerCase()) {
|
106
|
case "fp7":
|
107
|
return "corda_______::";
|
108
|
case "h2020":
|
109
|
return "corda__h2020::";
|
110
|
default:
|
111
|
return "corda_____he::";
|
112
|
}
|
113
|
case "eea":
|
114
|
return "euenvagency_::";
|
115
|
case "hrzz":
|
116
|
case "mzos":
|
117
|
return "irb_hr______::";
|
118
|
case "tara":
|
119
|
return "taraexp_____::";
|
120
|
case "tubitak":
|
121
|
return "tubitakf____::";
|
122
|
case "rcuk":
|
123
|
return "ukri________::";
|
124
|
default:
|
125
|
String prefix = funderShortName.toLowerCase();
|
126
|
// ensure we have 12 chars
|
127
|
while (prefix.length() < 12) {
|
128
|
prefix += "_";
|
129
|
}
|
130
|
return prefix + "::";
|
131
|
}
|
132
|
}
|
133
|
|
134
|
protected String calculateProjectId(final String funderShortName, final String funding, final String code) {
|
135
|
final String suffix = Hashing.md5(unescape(code));
|
136
|
final String funderPrefix = calculateFunderPrefix(funderShortName, funding);
|
137
|
final String projectId = funderPrefix + suffix;
|
138
|
// Some projectIds of FCT and NWO have been generated in a wrong way and must be remapped
|
139
|
return correctionUtils != null ? correctionUtils.correctProjectId(projectId) : projectId;
|
140
|
}
|
141
|
|
142
|
private String unescape(final String code) {
|
143
|
return code.replace("%2F", "/");
|
144
|
}
|
145
|
|
146
|
protected String calculateFunderId(final String funderShortName, final String funding) {
|
147
|
switch (funderShortName.toLowerCase()) {
|
148
|
case "ec":
|
149
|
return "ec__________::EC";
|
150
|
default:
|
151
|
final String fixedFunderShortName = fixFunderShortName(funderShortName);
|
152
|
final String prefix = calculateFunderPrefix(fixedFunderShortName, funding);
|
153
|
return prefix + fixedFunderShortName.toUpperCase();
|
154
|
}
|
155
|
}
|
156
|
|
157
|
protected String calculateFunderName(final String funderShortName) {
|
158
|
|
159
|
switch (funderShortName.toLowerCase()) {
|
160
|
case "aff":
|
161
|
case "aka":
|
162
|
return "Academy of Finland";
|
163
|
case "anr":
|
164
|
return "French National Research Agency (ANR)";
|
165
|
case "arc":
|
166
|
return "Australian Research Council (ARC)";
|
167
|
case "asap":
|
168
|
return "Aligning Science Across Parkinson's";
|
169
|
case "chist-era":
|
170
|
return "CHIST-ERA";
|
171
|
case "cihr":
|
172
|
return "Canadian Institutes of Health Research";
|
173
|
case "conicyt":
|
174
|
return "Comisión Nacional de Investigación Científica y Tecnológica";
|
175
|
case "dfg":
|
176
|
return "Deutsche Forschungsgemeinschaft";
|
177
|
case "ec":
|
178
|
return "European Commission";
|
179
|
case "eea":
|
180
|
return "European Environment Agency";
|
181
|
case "fct":
|
182
|
return "Fundação para a Ciência e a Tecnologia, I.P.";
|
183
|
case "fwf":
|
184
|
return "Austrian Science Fund (FWF)";
|
185
|
case "gsrt":
|
186
|
return "General Secretariat of Research and Technology (GSRT)";
|
187
|
case "hrzz":
|
188
|
return "Croatian Science Foundation (CSF)";
|
189
|
case "innoviris":
|
190
|
return "INNOVIRIS";
|
191
|
case "mestd":
|
192
|
return "Ministry of Education, Science and Technological Development of Republic of Serbia";
|
193
|
case "miur":
|
194
|
return "Ministero dell'Istruzione dell'Università e della Ricerca";
|
195
|
case "mzos":
|
196
|
return "Ministry of Science, Education and Sports of the Republic of Croatia (MSES)";
|
197
|
case "nhmrc":
|
198
|
return "National Health and Medical Research Council (NHMRC)";
|
199
|
case "nih":
|
200
|
return "National Institutes of Health";
|
201
|
case "nsf":
|
202
|
return "National Science Foundation";
|
203
|
case "nserc":
|
204
|
return "Natural Sciences and Engineering Research Council of Canada";
|
205
|
case "nwo":
|
206
|
return "Netherlands Organisation for Scientific Research (NWO)";
|
207
|
case "rcuk":
|
208
|
case "ukri":
|
209
|
return "UK Research and Innovation";
|
210
|
case "rif":
|
211
|
case "rpf":
|
212
|
return "Research and Innovation Foundation";
|
213
|
case "rsf":
|
214
|
return "Russian Science Foundation";
|
215
|
case "sfi":
|
216
|
return "Science Foundation Ireland";
|
217
|
case "sgov":
|
218
|
return "Gobierno de España";
|
219
|
case "snsf":
|
220
|
return "Swiss National Science Foundation";
|
221
|
case "sshrc":
|
222
|
return "Social Sciences and Humanities Research Council";
|
223
|
case "tara":
|
224
|
return "Tara Expeditions Foundation";
|
225
|
case "tubitak":
|
226
|
return "Türkiye Bilimsel ve Teknolojik Araştırma Kurumu";
|
227
|
case "wt":
|
228
|
return "Wellcome Trust";
|
229
|
default:
|
230
|
log.error("Funder short name '" + funderShortName + "' not managed");
|
231
|
return "";
|
232
|
}
|
233
|
}
|
234
|
|
235
|
protected String fixFundingName(final String funderShortName, final String fundingName) {
|
236
|
switch (funderShortName) {
|
237
|
case "EC":
|
238
|
if (fundingName.toLowerCase().startsWith("horizon 2020")) { return "H2020"; }
|
239
|
if (fundingName.toLowerCase().startsWith("horizon europe")) { return "HE"; }
|
240
|
default:
|
241
|
return fundingName;
|
242
|
}
|
243
|
}
|
244
|
|
245
|
public Collection<String> translateZenodoCommunity(final String community) {
|
246
|
if (community.contains(ZENODO_COMMUNITY)) {
|
247
|
final String context = community.substring(community.lastIndexOf("/") + 1);
|
248
|
final RestTemplate rt = new RestTemplate();
|
249
|
try {
|
250
|
return new HashSet<>(rt.getForObject(community_api + context + "/openairecommunities", ZenodoContextList.class)
|
251
|
.getOpenAirecommunitylist());
|
252
|
} catch (final RestClientException rce) {
|
253
|
log.error("Unable to get object for " + community_api + context + "/openairecommunities");
|
254
|
log.error(rce.getMessage());
|
255
|
return new HashSet<>();
|
256
|
}
|
257
|
} else {
|
258
|
return Sets.newHashSet(community);
|
259
|
}
|
260
|
}
|
261
|
|
262
|
private ContextInfo createContextInfo(final String[] arr, final int pos, final Map<String, String> labelMap) {
|
263
|
final StringWriter id = new StringWriter();
|
264
|
id.write(arr[0]);
|
265
|
for (int i = 0; i < pos; i++) {
|
266
|
id.write("::");
|
267
|
id.write(arr[i + 1]);
|
268
|
}
|
269
|
final String label = labelMap.get(id.toString());
|
270
|
final String elem = pos == 0 ? "context" : pos == 1 ? "category" : "concept";
|
271
|
final ContextInfo info = new ContextInfo(elem, id.toString(), label);
|
272
|
if (pos + 1 < arr.length) {
|
273
|
info.getChildren().add(createContextInfo(arr, pos + 1, labelMap));
|
274
|
}
|
275
|
return info;
|
276
|
}
|
277
|
|
278
|
public List<ContextInfo> processContexts(final List<String> zenodoCommunities, final Map<String, String> labelMap) {
|
279
|
return zenodoCommunities.stream()
|
280
|
.map(c -> translateZenodoCommunity(c))
|
281
|
.flatMap(coll -> coll.stream())
|
282
|
.map(ctx -> createContextInfo(ctx.split("::"), 0, labelMap))
|
283
|
.filter(info -> StringUtils.isNotBlank(info.getLabel()))
|
284
|
.collect(Collectors.toList());
|
285
|
}
|
286
|
|
287
|
public class ContextInfo {
|
288
|
|
289
|
private String elem;
|
290
|
private String id;
|
291
|
private String label;
|
292
|
private List<ContextInfo> children = new ArrayList<>();
|
293
|
|
294
|
public ContextInfo(final String elem,
|
295
|
final String id, final String label) {
|
296
|
this.elem = elem;
|
297
|
this.id = id;
|
298
|
this.label = label;
|
299
|
}
|
300
|
|
301
|
public String getElem() {
|
302
|
return elem;
|
303
|
}
|
304
|
|
305
|
public void setElem(final String elem) {
|
306
|
this.elem = elem;
|
307
|
}
|
308
|
|
309
|
public String getId() {
|
310
|
return id;
|
311
|
}
|
312
|
|
313
|
public void setId(final String id) {
|
314
|
this.id = id;
|
315
|
}
|
316
|
|
317
|
public List<ContextInfo> getChildren() {
|
318
|
return children;
|
319
|
}
|
320
|
|
321
|
public void setChildren(final List<ContextInfo> children) {
|
322
|
this.children = children;
|
323
|
}
|
324
|
|
325
|
public boolean isRoot() {
|
326
|
return elem.equals("context");
|
327
|
}
|
328
|
|
329
|
public String getLabel() {
|
330
|
return label;
|
331
|
}
|
332
|
|
333
|
public void setLabel(final String label) {
|
334
|
this.label = label;
|
335
|
}
|
336
|
}
|
337
|
|
338
|
}
|