Project

General

Profile

1
package eu.dnetlib.parthenos.catalogue;
2

    
3
import java.io.BufferedOutputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.IOException;
6
import java.net.URISyntaxException;
7
import java.util.Iterator;
8

    
9
import com.fasterxml.jackson.core.JsonEncoding;
10
import com.fasterxml.jackson.core.JsonFactory;
11
import com.fasterxml.jackson.core.JsonGenerator;
12
import com.google.common.base.Joiner;
13
import eu.dnetlib.parthenos.jrr.ParthenosRegistryResource;
14
import eu.dnetlib.parthenos.publisher.ParthenosPublisherException;
15
import eu.dnetlib.parthenos.rdf.ResourceReader;
16
import org.apache.commons.lang3.StringUtils;
17
import org.apache.commons.logging.Log;
18
import org.apache.commons.logging.LogFactory;
19
import org.apache.jena.rdf.model.Resource;
20
import org.springframework.beans.factory.annotation.Autowired;
21
import org.springframework.stereotype.Component;
22

    
23
import static eu.dnetlib.parthenos.CRM.*;
24

    
25
/**
26
 * Created by Alessia Bardi on 21/11/2017.
27
 *
28
 * @author Alessia Bardi
29
 */
30
@Component
31
public class CatalogueRegistrator {
32

    
33
	private static final Log log = LogFactory.getLog(CatalogueRegistrator.class);
34
	private final String PARTHENOS_BASE_URL = "http://parthenos.d4science.org";
35
	private final String CATALOGUE_NAME_REFIX = "parthenos_";
36

    
37
	@Autowired
38
	private ResourceReader resourceReader;
39

    
40
	@Autowired
41
	private CatalogueAPIClient catalogueAPIClient;
42

    
43
	private String groupTemplate = "{"
44
			+ "  \"name\":\"%s\","
45
			+ "  \"id\":\"%s\","
46
			+ "  \"title\": \"%s\""
47
			//+ "  \"description\": \"%s\""
48
			+ "}";
49

    
50

    
51

    
52
	public String register(final Resource resource, final Resource type) throws IOException, ParthenosPublisherException, URISyntaxException {
53
		String resURI = resource.getURI();
54
		log.debug(String.format("Catalogue --> Processing resource : %s with type: %s", resURI, type.getLocalName()));
55
		//For the catalogue: Must be purely lowercase alphanumeric (ascii) characters and these symbols: -_
56
		//Replacing all non-alphanumeric characters with empty strings
57
		String resCatName = getNameForCatalogue(resURI.substring(resURI.lastIndexOf("/") + 1));
58
		ParthenosRegistryResource prr = catalogueAPIClient.getRegistered(resCatName);
59
		if(prr != null){
60
			//TODO: what to do if already registered?
61
			log.debug(resCatName+ " is already registered");
62
			return prr.getUuid();
63
		}
64
		else {
65
			//resource not yet registered
66
			ensureGroups(resource);
67
			String json;
68
			switch (type.getLocalName()) {
69
			case "E7_Activity":
70
				json = getJsonForActivity(resource, resCatName);
71
				break;
72
			case "E39_Actor":
73
				json = getJsonForActor(resource, resCatName);
74
				break;
75
			case "E70_Thing":
76
				json = getJsonForThing(resource, resCatName);
77
				break;
78
			case "E29_Design_or_Procedure":
79
				json = getJsonForDesignProcedure(resource, resCatName);
80
				break;
81
			default:
82
				throw new IllegalArgumentException(String.format("Type " + type.getLocalName() + " not supported"));
83
			}
84
			String uuid = catalogueAPIClient.doRegister(json, resCatName);
85
			if(StringUtils.isBlank(uuid)){
86
				log.warn(String.format("%s could not be registered", resURI));
87
			}
88
			else {
89
				log.debug(String.format("%s registered on the catalogue with uuid: %s", resURI, uuid));
90
			}
91
			return uuid;
92
		}
93

    
94
	}
95

    
96
	protected boolean purge(final String resCatName) throws URISyntaxException, IOException {
97
		return catalogueAPIClient.purgeItem(resCatName);
98
	}
99

    
100
	/**
101
	 * The catalogue accepts names which are strings between 2 and 100 characters long, containing only lowercase alphanumeric characters, - and _
102
	 * @param name the original name
103
	 * @return a string which is name adapted for the catalogue
104
	 */
105
	protected String getNameForCatalogue(final String name){
106
		String n = name.replaceAll("[^A-Za-z0-9]","_").toLowerCase();
107
		if(n.length() > 100){
108
			n = n.substring(0,99);
109
		}
110
		if(n.length() < 2){
111
			n = CATALOGUE_NAME_REFIX+n;
112
		}
113
		return n;
114
	}
115

    
116
	/**
117
	 * Ensure that providers referred in the Resource are available as "groups" in the registry.
118
	 * @param res Resource
119
	 */
120
	protected void ensureGroups(final Resource res) throws ParthenosPublisherException, IOException, URISyntaxException {
121
		log.debug("Ensuring groups exist");
122
		Iterator<String> providerNames = resourceReader.getProviderNames(res);
123
		while(providerNames.hasNext()){
124
			String name = providerNames.next();
125
			if(StringUtils.isNotBlank(name)){
126
				String groupName = getNameForCatalogue(name);
127
				if(!catalogueAPIClient.groupExist(groupName)){
128
					String groupJson = String.format(groupTemplate, groupName, groupName, name);
129
					catalogueAPIClient.registerGroup(groupJson, groupName);
130
					log.info("NEW GROUP REGISTERED: "+groupName);
131
				}
132
			}
133
		}
134
	}
135

    
136
	protected String getJsonForActivity(final Resource res, final String resNameForCatalogue) throws IOException {
137
		JsonFactory jsonFactory = new JsonFactory();
138
		final ByteArrayOutputStream out = new ByteArrayOutputStream();
139
		BufferedOutputStream bos = new BufferedOutputStream(out);
140
		JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
141
		jg.writeStartObject();
142
		writeCommonFields(jg, res, resNameForCatalogue);
143
		//TODO: from which linked E39_Actor?
144
		//for project we can take it from the maintainer team: is this correct also for other Things (like Services and Software?)
145
		jg.writeStringField("maintainer", Joiner.on(", ").join(resourceReader.getMaintainerUrls(res)));
146
		//TODO: it should be better to identify email contacts rather than generic contact labels of maintainer
147
		//jg.writeStringField("maintainer_email", Joiner.on(", ").join(resourceReader.getMaintainerContacts(res)));
148

    
149
		jg.writeArrayFieldStart("extras");
150
		addExtra(jg, "system:type", E7_Activity.getLocalName());
151
		//specific class
152
		addExtra(jg, "instance of", resourceReader.findSpecificType(res, E7_Activity).getLocalName());
153
		if (res.getURI().startsWith(PARTHENOS_BASE_URL))
154
			addExtra(jg, "Parthenos URL", res.getURI());
155
		//for Services
156
		addExtra(jg, "competence", resourceReader.getCompetence(res));
157
		addExtra(jg, "provided by", Joiner.on(", ").join(resourceReader.getProviderNames(res)));
158
		addExtra(jg, "declared begin/end of operation", Joiner.on(", ").join(resourceReader.getDeclarativeTimes(res)));
159
		//TODO: where to get it?
160
		addExtra(jg, "last confirmation", "");
161
		//TODO: where to get it?
162
		addExtra(jg, "date of registration", "");
163
		addExtra(jg, "availability", resourceReader.getAvailability(res));
164
		//TODO: where to get it?
165
		addExtra(jg, "condition of use", resourceReader.getConditionOfUse(res));
166
		addExtra(jg, "contact points", Joiner.on(", ").join(resourceReader.getProviderContactPoints(res)));
167
		addExtra(jg, "activity type", Joiner.on(", ").join(resourceReader.getActivityTypes(res)));
168
		addExtra(jg, "hosts", Joiner.on(", ").join(resourceReader.getHostedStuff(res)));
169
		addExtra(jg, "online access point", Joiner.on(", ").join(resourceReader.getAccessPoints(res)));
170
		addExtra(jg, "protocol", Joiner.on(", ").join(resourceReader.getProtocols(res)));
171
		addExtra(jg,"curates", Joiner.on(", ").join(resourceReader.getCuratedObjects(res)));
172
		//TODO: where to get it?
173
		addExtra(jg, "runs on request", "");
174
		addExtra(jg, "delivers on request", Joiner.on(", ").join(resourceReader.getDeliversOnRequest(res)));
175
		addExtra(jg, "uses curation plan", Joiner.on(", ").join(resourceReader.getCurationPlans(res)));
176

    
177
		//for Projects
178
		addExtra(jg, "offers", Joiner.on(", ").join(resourceReader.getOfferedServiceUrls(res)));
179
		addExtra(jg, "started", Joiner.on(", ").join(resourceReader.getStartTimes(res)));
180

    
181
		jg.writeEndArray(); //end extras
182

    
183
		jg.writeEndObject();
184
		jg.close();
185
		return out.toString("UTF-8");
186
	}
187

    
188
	protected String getJsonForActor(final Resource res, final String resNameForCatalogue) throws IOException {
189
		JsonFactory jsonFactory = new JsonFactory();
190
		final ByteArrayOutputStream out = new ByteArrayOutputStream();
191
		BufferedOutputStream bos = new BufferedOutputStream(out);
192
		JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
193
		jg.writeStartObject();
194
		writeCommonFields(jg, res, resNameForCatalogue);
195

    
196
		jg.writeArrayFieldStart("extras");
197
		addExtra(jg, "system:type", E39_Actor.getLocalName());
198
		//specific class
199
		addExtra(jg, "instance of", resourceReader.findSpecificType(res, E39_Actor).getLocalName());
200
		if (res.getURI().startsWith(PARTHENOS_BASE_URL))
201
			addExtra(jg, "Parthenos URL", res.getURI());
202
		addExtra(jg, "has member", Joiner.on(", ").join(resourceReader.getMemberUrls(res)));
203
		addExtra(jg, "is member of", Joiner.on(", ").join(resourceReader.isMemberOf(res)));
204
		addExtra(jg, "has contact point", Joiner.on(", ").join(resourceReader.getResourceDirectContactPoints(res)));
205
		addExtra(jg, "provides", Joiner.on(", ").join(resourceReader.getProvidedServiceUrls(res)));
206
		jg.writeEndArray();
207

    
208
		jg.writeEndObject();
209
		jg.close();
210
		return out.toString("UTF-8");
211
	}
212

    
213
	protected String getJsonForThing(final Resource res, final String resNameForCatalogue) throws IOException {
214
		JsonFactory jsonFactory = new JsonFactory();
215
		final ByteArrayOutputStream out = new ByteArrayOutputStream();
216
		BufferedOutputStream bos = new BufferedOutputStream(out);
217
		JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
218
		jg.writeStartObject();
219
		writeCommonFields(jg, res, resNameForCatalogue);
220

    
221
		jg.writeArrayFieldStart("extras");
222
		addExtra(jg, "system:type", E70_Thing.getLocalName());
223
		//specific class
224
		addExtra(jg, "instance of", resourceReader.findSpecificType(res, E70_Thing).getLocalName());
225
		if (res.getURI().startsWith(PARTHENOS_BASE_URL)) {
226
			addExtra(jg, "Parthenos URL", res.getURI());
227
		}
228
		//TODO: things include digital objects, software, datasets, schema. Guess we should know what to add here.
229
		addExtra(jg, "is part of", Joiner.on(", ").join(resourceReader.getIsPartOfUrls(res)));
230
		addExtra(jg, "has part", Joiner.on(", ").join(resourceReader.getHasPartUrls(res)));
231
		addExtra(jg, "curated by", Joiner.on(", ").join(resourceReader.getCuratorUrls(res)));
232
		addExtra(jg, "curation plan", Joiner.on(", ").join(resourceReader.getCurationPlans(res)));
233
		addExtra(jg, "hosted by", Joiner.on(", ").join(resourceReader.getHostedBys(res)));
234
		//TODO where to get the encoding types? Should we through the Creation event?
235
		//addExtra(jg, "encoding type", Joiner.on(", ").join());
236
		//TODO where to get the schema/formats? Should we through the Creation event?
237
		//addExtra(jg, "encoding type", Joiner.on(", ").join());
238
		//TODO where to get the creator? Should we through the Creation event?
239
		//addExtra(jg, "creator", Joiner.on(", ").join());
240
		addExtra(jg, "subject", Joiner.on(", ").join(resourceReader.getSubjects(res)));
241
		addExtra(jg, "temporal coverage", Joiner.on(", ").join(resourceReader.getTemporalCoverages(res)));
242
		addExtra(jg, "spatial coverage", Joiner.on(", ").join(resourceReader.getSpatialCoverages(res)));
243

    
244
		jg.writeEndArray();
245

    
246
		jg.writeEndObject();
247
		jg.close();
248
		return out.toString("UTF-8");
249
	}
250

    
251
	protected String getJsonForDesignProcedure(final Resource res, final String resNameForCatalogue) throws IOException {
252
		JsonFactory jsonFactory = new JsonFactory();
253
		final ByteArrayOutputStream out = new ByteArrayOutputStream();
254
		BufferedOutputStream bos = new BufferedOutputStream(out);
255
		JsonGenerator jg = jsonFactory.createGenerator(bos, JsonEncoding.UTF8);
256
		jg.writeStartObject();
257
		writeCommonFields(jg, res, resNameForCatalogue);
258

    
259
		jg.writeArrayFieldStart("extras");
260
		addExtra(jg, "system:type", E29_Design_or_Procedure.getLocalName());
261
		//specific class
262
		addExtra(jg, "instance of", resourceReader.findSpecificType(res, E29_Design_or_Procedure).getLocalName());
263
		if (res.getURI().startsWith(PARTHENOS_BASE_URL)) {
264
			addExtra(jg, "Parthenos URL", res.getURI());
265
		}
266
		//TODO: add additional metadata for E29_Design_or_Procedure, if any
267
		jg.writeEndArray();
268

    
269
		jg.writeEndObject();
270
		jg.close();
271
		return out.toString("UTF-8");
272
	}
273

    
274
	protected void addExtra(final JsonGenerator jg, final String key, final String value) throws IOException {
275
		jg.writeStartObject();
276
		jg.writeStringField("key", key);
277
		jg.writeStringField("value", value);
278
		jg.writeEndObject();
279
	}
280

    
281
	protected void writeCommonFields(final JsonGenerator jg, final Resource res, final String resNameForCatalogue) throws IOException {
282
		//end of URI
283
		jg.writeStringField("name", resNameForCatalogue);
284
		//default license
285
		jg.writeStringField("license_id", "notspecified");
286
		String title = resourceReader.getTitle(res);
287
		if (StringUtils.isBlank(title))
288
			title = resNameForCatalogue;
289
		jg.writeStringField("title", title);
290
		//description
291
		jg.writeStringField("notes", resourceReader.getDescription(res));
292
		//the names of all superclasses of the entity
293
		jg.writeArrayFieldStart("tags");
294
		Iterator<String> classNames = resourceReader.getRDFClassNames(res);
295
		while (classNames.hasNext()) {
296
			jg.writeStartObject();
297
			jg.writeStringField("name", classNames.next());
298
			jg.writeEndObject();
299
		}
300
		jg.writeEndArray();
301
		//RI from which the entity has been collected, the source from which the RI collected the entity (if available).
302
		//TODO: other Actors to add as catalogue group?
303
		Iterator<String> providers = resourceReader.getProviderNames(res);
304
		jg.writeArrayFieldStart("groups");
305
		while(providers.hasNext()){
306
			String provider = providers.next();
307
			jg.writeStartObject();
308
			jg.writeStringField("name", provider);
309
			jg.writeEndObject();
310
		}
311
		jg.writeEndArray();
312
	}
313

    
314

    
315
	public ResourceReader getResourceReader() {
316
		return resourceReader;
317
	}
318

    
319
	public void setResourceReader(final ResourceReader resourceReader) {
320
		this.resourceReader = resourceReader;
321
	}
322

    
323
	public CatalogueAPIClient getCatalogueAPIClient() {
324
		return catalogueAPIClient;
325
	}
326

    
327
	public void setCatalogueAPIClient(final CatalogueAPIClient catalogueAPIClient) {
328
		this.catalogueAPIClient = catalogueAPIClient;
329
	}
330
}
(3-3/3)