1
|
package eu.dnetlib.parthenos.catalogue;
|
2
|
|
3
|
import java.io.BufferedOutputStream;
|
4
|
import java.io.ByteArrayOutputStream;
|
5
|
import java.io.IOException;
|
6
|
import java.net.URISyntaxException;
|
7
|
import java.util.Iterator;
|
8
|
|
9
|
import com.fasterxml.jackson.core.JsonEncoding;
|
10
|
import com.fasterxml.jackson.core.JsonFactory;
|
11
|
import com.fasterxml.jackson.core.JsonGenerator;
|
12
|
import com.google.common.base.Joiner;
|
13
|
import eu.dnetlib.parthenos.jrr.ParthenosRegistryResource;
|
14
|
import eu.dnetlib.parthenos.publisher.ParthenosPublisherException;
|
15
|
import eu.dnetlib.parthenos.rdf.ResourceReader;
|
16
|
import org.apache.commons.lang3.StringUtils;
|
17
|
import org.apache.commons.logging.Log;
|
18
|
import org.apache.commons.logging.LogFactory;
|
19
|
import org.apache.jena.rdf.model.Resource;
|
20
|
import org.springframework.beans.factory.annotation.Autowired;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import static eu.dnetlib.parthenos.CRM.*;
|
24
|
|
25
|
/**
|
26
|
* Created by Alessia Bardi on 21/11/2017.
|
27
|
*
|
28
|
* @author Alessia Bardi
|
29
|
*/
|
30
|
@Component
|
31
|
public class CatalogueRegistrator {
|
32
|
|
33
|
private static final Log log = LogFactory.getLog(CatalogueRegistrator.class);
|
34
|
private final String PARTHENOS_BASE_URL = "http://parthenos.d4science.org";
|
35
|
private final String CATALOGUE_NAME_REFIX = "parthenos_";
|
36
|
|
37
|
@Autowired
|
38
|
private ResourceReader resourceReader;
|
39
|
|
40
|
@Autowired
|
41
|
private CatalogueAPIClient catalogueAPIClient;
|
42
|
|
43
|
private String groupTemplate = "{"
|
44
|
+ " \"name\":\"%s\","
|
45
|
+ " \"id\":\"%s\","
|
46
|
+ " \"title\": \"%s\""
|
47
|
//+ " \"description\": \"%s\""
|
48
|
+ "}";
|
49
|
|
50
|
|
51
|
|
52
|
public String register(final Resource resource, final Resource type) throws IOException, ParthenosPublisherException, URISyntaxException {
|
53
|
String resURI = resource.getURI();
|
54
|
log.debug(String.format("Catalogue --> Processing resource : %s with type: %s", resURI, type.getLocalName()));
|
55
|
//For the catalogue: Must be purely lowercase alphanumeric (ascii) characters and these symbols: -_
|
56
|
//Replacing all non-alphanumeric characters with empty strings
|
57
|
String resCatName = getNameForCatalogue(resURI.substring(resURI.lastIndexOf("/") + 1));
|
58
|
ParthenosRegistryResource prr = catalogueAPIClient.getRegistered(resCatName);
|
59
|
if(prr != null){
|
60
|
//TODO: what to do if already registered?
|
61
|
log.debug(resCatName+ " is already registered");
|
62
|
return prr.getUuid();
|
63
|
}
|
64
|
else {
|
65
|
//resource not yet registered
|
66
|
ensureGroups(resource);
|
67
|
String json;
|
68
|
switch (type.getLocalName()) {
|
69
|
case "E7_Activity":
|
70
|
json = getJsonForActivity(resource, resCatName);
|
71
|
break;
|
72
|
case "E39_Actor":
|
73
|
json = getJsonForActor(resource, resCatName);
|
74
|
break;
|
75
|
case "E70_Thing":
|
76
|
json = getJsonForThing(resource, resCatName);
|
77
|
break;
|
78
|
case "E29_Design_or_Procedure":
|
79
|
json = getJsonForDesignProcedure(resource, resCatName);
|
80
|
break;
|
81
|
default:
|
82
|
throw new IllegalArgumentException(String.format("Type " + type.getLocalName() + " not supported"));
|
83
|
}
|
84
|
String uuid = catalogueAPIClient.doRegister(json, resCatName);
|
85
|
if(StringUtils.isBlank(uuid)){
|
86
|
log.warn(String.format("%s could not be registered", resURI));
|
87
|
}
|
88
|
else {
|
89
|
log.debug(String.format("%s registered on the catalogue with uuid: %s", resURI, uuid));
|
90
|
}
|
91
|
return uuid;
|
92
|
}
|
93
|
|
94
|
}
|
95
|
|
96
|
protected boolean purge(final String resCatName) throws URISyntaxException, IOException {
|
97
|
return catalogueAPIClient.purgeItem(resCatName);
|
98
|
}
|
99
|
|
100
|
/**
|
101
|
* The catalogue accepts names which are strings between 2 and 100 characters long, containing only lowercase alphanumeric characters, - and _
|
102
|
* @param name the original name
|
103
|
* @return a string which is name adapted for the catalogue
|
104
|
*/
|
105
|
protected String getNameForCatalogue(final String name){
|
106
|
String n = name.replaceAll("[^A-Za-z0-9]","_").toLowerCase();
|
107
|
if(n.length() > 100){
|
108
|
n = n.substring(0,99);
|
109
|
}
|
110
|
if(n.length() < 2){
|
111
|
n = CATALOGUE_NAME_REFIX+n;
|
112
|
}
|
113
|
return n;
|
114
|
}
|
115
|
|
116
|
/**
|
117
|
* Ensure that providers referred in the Resource are available as "groups" in the registry.
|
118
|
* @param res Resource
|
119
|
*/
|
120
|
protected void ensureGroups(final Resource res) throws ParthenosPublisherException, IOException, URISyntaxException {
|
121
|
log.debug("Ensuring groups exist");
|
122
|
Iterator<String> providerNames = resourceReader.getProviderNames(res);
|
123
|
while(providerNames.hasNext()){
|
124
|
String name = providerNames.next();
|
125
|
if(StringUtils.isNotBlank(name)){
|
126
|
String groupName = getNameForCatalogue(name);
|
127
|
if(!catalogueAPIClient.groupExist(groupName)){
|
128
|
String groupJson = String.format(groupTemplate, groupName, groupName, name);
|
129
|
catalogueAPIClient.registerGroup(groupJson, groupName);
|
130
|
log.info("NEW GROUP REGISTERED: "+groupName);
|
131
|
}
|
132
|
}
|
133
|
}
|
134
|
}
|
135
|
|
136
|
protected String getJsonForActivity(final Resource res, final String resNameForCatalogue) throws IOException {
|
137
|
JsonFactory jsonFactory = new JsonFactory();
|
138
|
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
139
|
BufferedOutputStream bos = new BufferedOutputStream(out);
|
140
|
JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
|
141
|
jg.writeStartObject();
|
142
|
writeCommonFields(jg, res, resNameForCatalogue);
|
143
|
//TODO: from which linked E39_Actor?
|
144
|
//for project we can take it from the maintainer team: is this correct also for other Things (like Services and Software?)
|
145
|
jg.writeStringField("maintainer", Joiner.on(", ").join(resourceReader.getMaintainerUrls(res)));
|
146
|
//TODO: it should be better to identify email contacts rather than generic contact labels of maintainer
|
147
|
//jg.writeStringField("maintainer_email", Joiner.on(", ").join(resourceReader.getMaintainerContacts(res)));
|
148
|
|
149
|
jg.writeArrayFieldStart("extras");
|
150
|
addExtra(jg, "system:type", E7_Activity.getLocalName());
|
151
|
//specific class
|
152
|
addExtra(jg, "instance of", resourceReader.findSpecificType(res, E7_Activity).getLocalName());
|
153
|
if (res.getURI().startsWith(PARTHENOS_BASE_URL))
|
154
|
addExtra(jg, "Parthenos URL", res.getURI());
|
155
|
//for Services
|
156
|
addExtra(jg, "competence", resourceReader.getCompetence(res));
|
157
|
addExtra(jg, "provided by", Joiner.on(", ").join(resourceReader.getProviderNames(res)));
|
158
|
addExtra(jg, "declared begin/end of operation", Joiner.on(", ").join(resourceReader.getDeclarativeTimes(res)));
|
159
|
//TODO: where to get it?
|
160
|
addExtra(jg, "last confirmation", "");
|
161
|
//TODO: where to get it?
|
162
|
addExtra(jg, "date of registration", "");
|
163
|
addExtra(jg, "availability", resourceReader.getAvailability(res));
|
164
|
//TODO: where to get it?
|
165
|
addExtra(jg, "condition of use", resourceReader.getConditionOfUse(res));
|
166
|
addExtra(jg, "contact points", Joiner.on(", ").join(resourceReader.getProviderContactPoints(res)));
|
167
|
addExtra(jg, "activity type", Joiner.on(", ").join(resourceReader.getActivityTypes(res)));
|
168
|
addExtra(jg, "hosts", Joiner.on(", ").join(resourceReader.getHostedStuff(res)));
|
169
|
addExtra(jg, "online access point", Joiner.on(", ").join(resourceReader.getAccessPoints(res)));
|
170
|
addExtra(jg, "protocol", Joiner.on(", ").join(resourceReader.getProtocols(res)));
|
171
|
addExtra(jg,"curates", Joiner.on(", ").join(resourceReader.getCuratedObjects(res)));
|
172
|
//TODO: where to get it?
|
173
|
addExtra(jg, "runs on request", "");
|
174
|
addExtra(jg, "delivers on request", Joiner.on(", ").join(resourceReader.getDeliversOnRequest(res)));
|
175
|
addExtra(jg, "uses curation plan", Joiner.on(", ").join(resourceReader.getCurationPlans(res)));
|
176
|
|
177
|
//for Projects
|
178
|
addExtra(jg, "offers", Joiner.on(", ").join(resourceReader.getOfferedServiceUrls(res)));
|
179
|
addExtra(jg, "started", Joiner.on(", ").join(resourceReader.getStartTimes(res)));
|
180
|
|
181
|
jg.writeEndArray(); //end extras
|
182
|
|
183
|
jg.writeEndObject();
|
184
|
jg.close();
|
185
|
return out.toString("UTF-8");
|
186
|
}
|
187
|
|
188
|
protected String getJsonForActor(final Resource res, final String resNameForCatalogue) throws IOException {
|
189
|
JsonFactory jsonFactory = new JsonFactory();
|
190
|
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
191
|
BufferedOutputStream bos = new BufferedOutputStream(out);
|
192
|
JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
|
193
|
jg.writeStartObject();
|
194
|
writeCommonFields(jg, res, resNameForCatalogue);
|
195
|
|
196
|
jg.writeArrayFieldStart("extras");
|
197
|
addExtra(jg, "system:type", E39_Actor.getLocalName());
|
198
|
//specific class
|
199
|
addExtra(jg, "instance of", resourceReader.findSpecificType(res, E39_Actor).getLocalName());
|
200
|
if (res.getURI().startsWith(PARTHENOS_BASE_URL))
|
201
|
addExtra(jg, "Parthenos URL", res.getURI());
|
202
|
addExtra(jg, "has member", Joiner.on(", ").join(resourceReader.getMemberUrls(res)));
|
203
|
addExtra(jg, "is member of", Joiner.on(", ").join(resourceReader.isMemberOf(res)));
|
204
|
addExtra(jg, "has contact point", Joiner.on(", ").join(resourceReader.getResourceDirectContactPoints(res)));
|
205
|
addExtra(jg, "provides", Joiner.on(", ").join(resourceReader.getProvidedServiceUrls(res)));
|
206
|
jg.writeEndArray();
|
207
|
|
208
|
jg.writeEndObject();
|
209
|
jg.close();
|
210
|
return out.toString("UTF-8");
|
211
|
}
|
212
|
|
213
|
protected String getJsonForThing(final Resource res, final String resNameForCatalogue) throws IOException {
|
214
|
JsonFactory jsonFactory = new JsonFactory();
|
215
|
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
216
|
BufferedOutputStream bos = new BufferedOutputStream(out);
|
217
|
JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
|
218
|
jg.writeStartObject();
|
219
|
writeCommonFields(jg, res, resNameForCatalogue);
|
220
|
|
221
|
jg.writeArrayFieldStart("extras");
|
222
|
addExtra(jg, "system:type", E70_Thing.getLocalName());
|
223
|
//specific class
|
224
|
addExtra(jg, "instance of", resourceReader.findSpecificType(res, E70_Thing).getLocalName());
|
225
|
if (res.getURI().startsWith(PARTHENOS_BASE_URL)) {
|
226
|
addExtra(jg, "Parthenos URL", res.getURI());
|
227
|
}
|
228
|
//TODO: things include digital objects, software, datasets, schema. Guess we should know what to add here.
|
229
|
addExtra(jg, "is part of", Joiner.on(", ").join(resourceReader.getIsPartOfUrls(res)));
|
230
|
addExtra(jg, "has part", Joiner.on(", ").join(resourceReader.getHasPartUrls(res)));
|
231
|
addExtra(jg, "curated by", Joiner.on(", ").join(resourceReader.getCuratorUrls(res)));
|
232
|
addExtra(jg, "curation plan", Joiner.on(", ").join(resourceReader.getCurationPlans(res)));
|
233
|
addExtra(jg, "hosted by", Joiner.on(", ").join(resourceReader.getHostedBys(res)));
|
234
|
//TODO where to get the encoding types? Should we through the Creation event?
|
235
|
//addExtra(jg, "encoding type", Joiner.on(", ").join());
|
236
|
//TODO where to get the schema/formats? Should we through the Creation event?
|
237
|
//addExtra(jg, "encoding type", Joiner.on(", ").join());
|
238
|
//TODO where to get the creator? Should we through the Creation event?
|
239
|
//addExtra(jg, "creator", Joiner.on(", ").join());
|
240
|
addExtra(jg, "subject", Joiner.on(", ").join(resourceReader.getSubjects(res)));
|
241
|
addExtra(jg, "temporal coverage", Joiner.on(", ").join(resourceReader.getTemporalCoverages(res)));
|
242
|
addExtra(jg, "spatial coverage", Joiner.on(", ").join(resourceReader.getSpatialCoverages(res)));
|
243
|
|
244
|
jg.writeEndArray();
|
245
|
|
246
|
jg.writeEndObject();
|
247
|
jg.close();
|
248
|
return out.toString("UTF-8");
|
249
|
}
|
250
|
|
251
|
protected String getJsonForDesignProcedure(final Resource res, final String resNameForCatalogue) throws IOException {
|
252
|
JsonFactory jsonFactory = new JsonFactory();
|
253
|
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
254
|
BufferedOutputStream bos = new BufferedOutputStream(out);
|
255
|
JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
|
256
|
jg.writeStartObject();
|
257
|
writeCommonFields(jg, res, resNameForCatalogue);
|
258
|
|
259
|
jg.writeArrayFieldStart("extras");
|
260
|
addExtra(jg, "system:type", E29_Design_or_Procedure.getLocalName());
|
261
|
//specific class
|
262
|
addExtra(jg, "instance of", resourceReader.findSpecificType(res, E29_Design_or_Procedure).getLocalName());
|
263
|
if (res.getURI().startsWith(PARTHENOS_BASE_URL)) {
|
264
|
addExtra(jg, "Parthenos URL", res.getURI());
|
265
|
}
|
266
|
//TODO: add additional metadata for E29_Design_or_Procedure, if any
|
267
|
jg.writeEndArray();
|
268
|
|
269
|
jg.writeEndObject();
|
270
|
jg.close();
|
271
|
return out.toString("UTF-8");
|
272
|
}
|
273
|
|
274
|
protected void addExtra(final JsonGenerator jg, final String key, final String value) throws IOException {
|
275
|
jg.writeStartObject();
|
276
|
jg.writeStringField("key", key);
|
277
|
jg.writeStringField("value", value);
|
278
|
jg.writeEndObject();
|
279
|
}
|
280
|
|
281
|
protected void writeCommonFields(final JsonGenerator jg, final Resource res, final String resNameForCatalogue) throws IOException {
|
282
|
//end of URI
|
283
|
jg.writeStringField("name", resNameForCatalogue);
|
284
|
//default license
|
285
|
jg.writeStringField("license_id", "notspecified");
|
286
|
String title = resourceReader.getTitle(res);
|
287
|
if (StringUtils.isBlank(title))
|
288
|
title = resNameForCatalogue;
|
289
|
jg.writeStringField("title", title);
|
290
|
//description
|
291
|
jg.writeStringField("notes", resourceReader.getDescription(res));
|
292
|
//the names of all superclasses of the entity
|
293
|
jg.writeArrayFieldStart("tags");
|
294
|
Iterator<String> classNames = resourceReader.getRDFClassNames(res);
|
295
|
while (classNames.hasNext()) {
|
296
|
jg.writeStartObject();
|
297
|
jg.writeStringField("name", classNames.next());
|
298
|
jg.writeEndObject();
|
299
|
}
|
300
|
jg.writeEndArray();
|
301
|
//RI from which the entity has been collected, the source from which the RI collected the entity (if available).
|
302
|
//TODO: other Actors to add as catalogue group?
|
303
|
Iterator<String> providers = resourceReader.getProviderNames(res);
|
304
|
jg.writeArrayFieldStart("groups");
|
305
|
while(providers.hasNext()){
|
306
|
String provider = providers.next();
|
307
|
jg.writeStartObject();
|
308
|
jg.writeStringField("name", provider);
|
309
|
jg.writeEndObject();
|
310
|
}
|
311
|
jg.writeEndArray();
|
312
|
}
|
313
|
|
314
|
|
315
|
public ResourceReader getResourceReader() {
|
316
|
return resourceReader;
|
317
|
}
|
318
|
|
319
|
public void setResourceReader(final ResourceReader resourceReader) {
|
320
|
this.resourceReader = resourceReader;
|
321
|
}
|
322
|
|
323
|
public CatalogueAPIClient getCatalogueAPIClient() {
|
324
|
return catalogueAPIClient;
|
325
|
}
|
326
|
|
327
|
public void setCatalogueAPIClient(final CatalogueAPIClient catalogueAPIClient) {
|
328
|
this.catalogueAPIClient = catalogueAPIClient;
|
329
|
}
|
330
|
}
|