Revision 49180
Added by Alessia Bardi over 6 years ago
GCubeResourceGenerator.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.parthenos.registry; |
2 | 2 |
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.List; |
|
3 | 6 |
import java.util.Map; |
7 |
import java.util.Set; |
|
4 | 8 |
|
9 |
import com.fasterxml.jackson.core.JsonFactory; |
|
10 |
import com.fasterxml.jackson.core.JsonGenerator; |
|
5 | 11 |
import com.google.common.collect.Lists; |
6 | 12 |
import com.google.common.collect.Maps; |
7 |
import eu.dnetlib.parthenos.publisher.SaxonHelper; |
|
8 |
import net.sf.saxon.s9api.*; |
|
13 |
import com.google.common.collect.Sets; |
|
14 |
import eu.dnetlib.parthenos.CRM; |
|
15 |
import eu.dnetlib.parthenos.CRMdig; |
|
16 |
import eu.dnetlib.parthenos.CRMpe; |
|
17 |
import org.apache.commons.io.IOUtils; |
|
9 | 18 |
import org.apache.commons.lang3.StringUtils; |
10 | 19 |
import org.apache.commons.logging.Log; |
11 | 20 |
import org.apache.commons.logging.LogFactory; |
12 |
import org.gcube.informationsystem.model.ER; |
|
13 |
import org.springframework.beans.factory.annotation.Autowired; |
|
21 |
import org.apache.jena.assembler.AssemblerHelp; |
|
22 |
import org.apache.jena.assembler.exceptions.AmbiguousSpecificTypeException; |
|
23 |
import org.apache.jena.ontology.OntModel; |
|
24 |
import org.apache.jena.ontology.OntModelSpec; |
|
25 |
import org.apache.jena.rdf.model.*; |
|
26 |
import org.apache.jena.vocabulary.RDF; |
|
27 |
import org.apache.jena.vocabulary.RDFS; |
|
28 |
import org.gcube.informationsystem.model.entity.facet.IdentifierFacet.IdentificationType; |
|
14 | 29 |
import org.springframework.stereotype.Component; |
15 | 30 |
|
16 | 31 |
/** |
... | ... | |
23 | 38 |
|
24 | 39 |
private static final Log log = LogFactory.getLog(GCubeResourceGenerator.class); |
25 | 40 |
|
26 |
private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/"; |
|
27 |
private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri"; |
|
28 |
private static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; |
|
41 |
private OntModel baseModel; |
|
29 | 42 |
|
30 |
@Autowired |
|
31 |
private SaxonHelper saxonHelper; |
|
32 |
private XPathSelector xpathSelectorRDFDescription; |
|
33 |
private XPathSelector xpathSelectorObjIdentifier; |
|
43 |
public GCubeResourceGenerator() { |
|
44 |
final String url = "http://www.w3.org/TR/REC-rdf-syntax/example14.nt"; |
|
45 |
baseModel = ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM_TRANS_INF); |
|
46 |
baseModel.read(CRMpe.RDFS_URL); |
|
47 |
baseModel.read(CRM.RDFS_URL); |
|
48 |
baseModel.read(CRMdig.RDFS_URL); |
|
49 |
} |
|
34 | 50 |
|
51 |
protected InfModel loadBaseModel(){ |
|
52 |
return ModelFactory.createRDFSModel(baseModel); |
|
53 |
} |
|
54 |
|
|
35 | 55 |
/** |
36 |
* Generates the Gcube ER instances (Entity [Resource/Facet] and Relation) to be registered from a given RDF record serialised in RDF/XML PLAIN format. |
|
56 |
* Generates the Gcube ER instances (Entity [Resource/Facet] and Relation) as json strings to be registered from a given RDF record serialised in RDF/XML PLAIN format.
|
|
37 | 57 |
* @param rdfRecord RDF record in RDF/XML PLAIN format |
38 |
* @return an Iterable of GCube ER or an empty Iterable if the record is blank or is without objIdentifier
|
|
58 |
* @return an Iterable of json strings or an empty Iterable if the record is blank
|
|
39 | 59 |
*/ |
40 |
public Iterable<ER> getGCubeER(final String rdfRecord) throws SaxonApiException { |
|
60 |
public Iterable<String> getGCubeER(final String rdfRecord) throws IOException { |
|
61 |
List<String> jsonResources = Lists.newArrayList(); |
|
41 | 62 |
//This map contains the pair (rdf:about, registryUUID) registered so far, useful I guess to create relationships between resources/facets. |
42 | 63 |
Map<String,String> resourceIdentifiers = Maps.newHashMap(); |
43 | 64 |
if (StringUtils.isBlank(rdfRecord)) { |
44 | 65 |
log.warn("Got empty record"); |
45 | 66 |
return Lists.newArrayList(); |
46 | 67 |
} |
47 |
//let's avoid to parse from scratch for every xpath |
|
48 |
XdmNode document = this.saxonHelper.help().parseXML(rdfRecord); |
|
49 |
xpathSelectorObjIdentifier.setContextItem(document); |
|
50 |
String objIdentifier = xpathSelectorObjIdentifier.evaluateSingle().getStringValue(); |
|
51 |
if (StringUtils.isBlank(objIdentifier)) { |
|
52 |
log.warn("Got record with no objIdentifier -- skipping"); |
|
53 |
return Lists.newArrayList(); |
|
54 |
} |
|
55 |
xpathSelectorRDFDescription.setContextItem(document); |
|
56 |
for(XdmItem xdmItem : xpathSelectorRDFDescription){ |
|
57 |
XdmNode node = (XdmNode) xdmItem; |
|
58 | 68 |
|
69 |
InfModel model = loadBaseModel(); |
|
70 |
model.read(IOUtils.toInputStream(rdfRecord, "UTF-8"), CRMpe.NS); |
|
59 | 71 |
|
72 |
JsonFactory jsonFactory = new JsonFactory(); |
|
73 |
Set<String> uriProcessed = Sets.newHashSet(); |
|
74 |
|
|
75 |
//let's start with services |
|
76 |
ResIterator iter = model.listResourcesWithProperty(RDF.type, CRMpe.PE1_Service); |
|
77 |
while (iter.hasNext()) { |
|
78 |
Resource res = iter.nextResource(); |
|
79 |
String resourceURI = res.getURI(); |
|
80 |
if (!uriProcessed.contains(resourceURI)) { |
|
81 |
log.debug("Processing "+resourceURI); |
|
82 |
Resource specificType = findSpecificType(res, CRMpe.PE1_Service); |
|
83 |
StringWriter sw = new StringWriter(); |
|
84 |
JsonGenerator jg = jsonFactory.createGenerator(sw); |
|
85 |
jg.writeStartObject(); |
|
86 |
|
|
87 |
jg.writeStringField("@class", specificType.getURI()); |
|
88 |
|
|
89 |
//******THE FACETS *******// |
|
90 |
jg.writeArrayFieldStart("consistsOf"); |
|
91 |
//list of facets |
|
92 |
|
|
93 |
writeIdentifierFacet(jg, resourceURI); |
|
94 |
writeInfoFacet(jg, getTitleFromRDFResource(res), getDescriptionFromRDFResource(res), getCompetenceFromRDFResource(res), |
|
95 |
getAvailabilityFromRDFResource(res)); |
|
96 |
writeEventFacet(jg); |
|
97 |
writeRightsFacet(jg, res); |
|
98 |
writeContactReferenceFacet(jg, res); |
|
99 |
writeDesignatedAccessPointFacet(jg, res); |
|
100 |
|
|
101 |
jg.writeEndArray(); |
|
102 |
|
|
103 |
// ******* RELATIONSHIPS *****// |
|
104 |
|
|
105 |
jg.writeEndObject(); |
|
106 |
jg.close(); |
|
107 |
String json = sw.toString(); |
|
108 |
log.debug(json); |
|
109 |
jsonResources.add(json); |
|
110 |
uriProcessed.add(resourceURI); |
|
111 |
} else { |
|
112 |
log.debug(resourceURI+" already processed, now skipping it"); |
|
113 |
} |
|
60 | 114 |
} |
115 |
return jsonResources; |
|
116 |
} |
|
61 | 117 |
|
118 |
/** |
|
119 |
* Finds the most specific type of res. |
|
120 |
* @param res Resource you want to find the most specific type |
|
121 |
* @param fallbackType Resource representing the type to return if there is no type or if we get AmbiguousSpecificTypeException |
|
122 |
* @return Resource: the most specific type, if any. fallbackType otherwise |
|
123 |
*/ |
|
124 |
protected Resource findSpecificType(final Resource res, final Resource fallbackType){ |
|
125 |
Resource specType = fallbackType; |
|
126 |
try{ |
|
127 |
specType = AssemblerHelp.findSpecificType(res, fallbackType); |
|
128 |
}catch(AmbiguousSpecificTypeException e){ |
|
129 |
log.warn(res.getURI()+": "+e.getMessage()); |
|
130 |
} |
|
131 |
return specType; |
|
132 |
} |
|
133 |
/* |
|
134 |
{"@class": "IsIdentifiedBy", |
|
135 |
"target": { |
|
136 |
"value": "http://pippo", |
|
137 |
"type:'URL", |
|
138 |
"@class": "IdentifierFacet", |
|
139 |
} |
|
140 |
} |
|
141 |
*/ |
|
142 |
protected void writeIdentifierFacet(final JsonGenerator jg, final String identifier) throws IOException { |
|
143 |
jg.writeStartObject(); |
|
144 |
jg.writeStringField("@class", "P1_is_identified_by"); |
|
145 |
jg.writeObjectFieldStart("target"); |
|
146 |
jg.writeStringField("@class", "IdentifierFacet"); |
|
147 |
jg.writeStringField("value", identifier); |
|
148 |
jg.writeStringField("type", IdentificationType.URI.name()); |
|
149 |
jg.writeEndObject(); |
|
150 |
jg.writeEndObject(); |
|
151 |
} |
|
62 | 152 |
|
63 |
return null; |
|
153 |
protected void writeInfoFacet(final JsonGenerator jg, final String title, final String description, final String competence, final String availability) throws IOException { |
|
154 |
jg.writeStartObject(); |
|
155 |
jg.writeStringField("@class", "PE_Info_Facet"); |
|
156 |
jg.writeObjectFieldStart("target"); |
|
157 |
jg.writeStringField("title", title); |
|
158 |
jg.writeStringField("description", description); |
|
159 |
jg.writeStringField("competence", competence); |
|
160 |
//TODO: uncomment this when George adds the rel to the model, see method getAvailabilityFromRDFResource below |
|
161 |
//jg.writeStringField("availability", availability); |
|
162 |
jg.writeEndObject(); |
|
163 |
jg.writeEndObject(); |
|
64 | 164 |
} |
65 | 165 |
|
66 |
private String extractFromRecord(final String record, final XPathSelector xPathSelector) { |
|
67 |
try { |
|
68 |
return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector); |
|
69 |
} catch (SaxonApiException e) { |
|
70 |
throw new RuntimeException("Cannot extract content ", e); |
|
166 |
|
|
167 |
protected void writeEventFacet(final JsonGenerator jg){ |
|
168 |
//TODO: implement me. get begin/end of operation from PP42_has_declarative_time |
|
169 |
} |
|
170 |
|
|
171 |
protected void writeRightsFacet(final JsonGenerator jg, final Resource res) throws IOException { |
|
172 |
//TODO: implement me. E30_Right facet extends from licenseFacet but it is not correct (textUrl is mandatory, we can't use it in Parthenos) |
|
173 |
} |
|
174 |
|
|
175 |
protected void writeContactReferenceFacet(final JsonGenerator jg, final Resource resource) throws IOException { |
|
176 |
//PP2_provided_by |
|
177 |
final Statement s = resource.getProperty(CRMpe.PP2_provided_by); |
|
178 |
|
|
179 |
if(s!=null){ |
|
180 |
String appellation = ""; |
|
181 |
String description = ""; |
|
182 |
String legalAddress = ""; |
|
183 |
String email =""; |
|
184 |
String website = ""; |
|
185 |
String address = ""; |
|
186 |
String phoneNumber =""; |
|
187 |
|
|
188 |
Resource provider = s.getResource(); |
|
189 |
if(provider != null){ |
|
190 |
//more contact point per provider |
|
191 |
StmtIterator contactPointsStm = provider.listProperties(CRM.P76_has_contact_point); |
|
192 |
while(contactPointsStm.hasNext()){ |
|
193 |
Resource cp = contactPointsStm.nextStatement().getResource(); |
|
194 |
appellation = getLabelFromRDFResource(cp); |
|
195 |
description = getDescriptionFromRDFResource(cp); |
|
196 |
//TODO: where to find email and legaladdress, address, phoneNumber, website? |
|
197 |
jg.writeStartObject(); |
|
198 |
jg.writeStringField("@class", "PE_Contact_Reference_Facet"); |
|
199 |
jg.writeObjectFieldStart("target"); |
|
200 |
jg.writeStringField("appellation", appellation); |
|
201 |
jg.writeStringField("description", description); |
|
202 |
jg.writeStringField("legalAddress", legalAddress); |
|
203 |
jg.writeStringField("email", email); |
|
204 |
jg.writeStringField("website", website); |
|
205 |
jg.writeStringField("address", address); |
|
206 |
jg.writeStringField("phoneNumber", phoneNumber); |
|
207 |
jg.writeEndObject(); |
|
208 |
jg.writeEndObject(); |
|
209 |
} |
|
210 |
} |
|
71 | 211 |
} |
212 |
|
|
72 | 213 |
} |
73 | 214 |
|
74 |
private void prepareXpathSelectors() throws SaxonApiException { |
|
75 |
Map<String, String> namespaces = Maps.newHashMap(); |
|
76 |
namespaces.put("oai", OAI_NAMESPACE_URI); |
|
77 |
namespaces.put("dri", DRI_NAMESPACE_URI); |
|
78 |
namespaces.put("rdf", RDF_NAMESPACE_URI); |
|
79 |
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces); |
|
80 |
xpathSelectorRDFDescription = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF/rdf:Description", namespaces); |
|
215 |
protected void writeDesignatedAccessPointFacet(final JsonGenerator jg, final Resource resource) throws IOException { |
|
216 |
//PP28_has_designated_access_point |
|
217 |
StmtIterator apStms = resource.listProperties(CRMpe.PP28_has_designated_access_point); |
|
218 |
while(apStms.hasNext()){ |
|
219 |
String entryName = getLabelFromRDFResource(resource); |
|
220 |
//(mandatory) |
|
221 |
String endpoint= ""; |
|
222 |
String protocol= ""; |
|
223 |
String description= getDescriptionFromRDFResource(resource); |
|
224 |
//TODO: authorization is a ValueSchema, I do not understand how to use it and how to map it |
|
225 |
String authorization= ""; |
|
226 |
Resource ap = apStms.next().getResource(); |
|
227 |
endpoint = ap.getURI(); |
|
228 |
//TODO: where to get protocol and authorization? |
|
229 |
jg.writeStartObject(); |
|
230 |
jg.writeStringField("@class", "PE29_Access_Point"); |
|
231 |
jg.writeObjectFieldStart("target"); |
|
232 |
jg.writeStringField("entryName", entryName); |
|
233 |
jg.writeStringField("description", description); |
|
234 |
jg.writeStringField("endpoint", endpoint); |
|
235 |
jg.writeStringField("protocol", protocol); |
|
236 |
//TODO: authorization is a ValueSchema, I do not understand how to use it and how to map it |
|
237 |
//jg.writeStringField("authorization", authorization); |
|
238 |
jg.writeEndObject(); |
|
239 |
jg.writeEndObject(); |
|
240 |
|
|
241 |
} |
|
242 |
|
|
81 | 243 |
} |
82 | 244 |
|
83 | 245 |
|
84 | 246 |
|
85 |
public GCubeResourceGenerator() throws SaxonApiException { |
|
86 |
prepareXpathSelectors(); |
|
247 |
|
|
248 |
|
|
249 |
protected String getTitleFromRDFResource(final Resource resource){ |
|
250 |
String title = ""; |
|
251 |
final Statement s = resource.getProperty(CRM.P1_is_identified_by); |
|
252 |
if(s != null){ |
|
253 |
Resource titleRes = s.getResource(); |
|
254 |
if(titleRes != null && (titleRes.hasProperty(RDF.type, CRM.E35_Title) || titleRes.hasProperty(RDF.type, CRM.E41_Appellation) )){ |
|
255 |
title = getLabelFromRDFResource(titleRes); |
|
256 |
} |
|
257 |
} |
|
258 |
return title; |
|
87 | 259 |
} |
88 | 260 |
|
89 |
public SaxonHelper getSaxonHelper() { |
|
90 |
return saxonHelper; |
|
261 |
protected String getDescriptionFromRDFResource(final Resource resource){ |
|
262 |
if(resource.hasProperty(CRM.P3_has_note)){ |
|
263 |
return resource.getProperty(CRM.P3_has_note).getString(); |
|
264 |
} |
|
265 |
else return ""; |
|
91 | 266 |
} |
92 | 267 |
|
93 |
public void setSaxonHelper(final SaxonHelper saxonHelper) { |
|
94 |
this.saxonHelper = saxonHelper; |
|
268 |
protected String getLabelFromRDFResource(final Resource resource){ |
|
269 |
if(resource.hasProperty(RDFS.label)){ |
|
270 |
return resource.getProperty(RDFS.label).getString(); |
|
271 |
} |
|
272 |
else return ""; |
|
95 | 273 |
} |
274 |
|
|
275 |
protected String getCompetenceFromRDFResource(final Resource resource){ |
|
276 |
String comp = ""; |
|
277 |
if(resource.hasProperty(CRMpe.PP45_has_competency)){ |
|
278 |
Resource compRes = resource.getProperty(CRMpe.PP45_has_competency).getResource(); |
|
279 |
if(compRes.hasProperty(RDFS.label)) |
|
280 |
comp = compRes.getProperty(RDFS.label).getString(); |
|
281 |
} |
|
282 |
return comp; |
|
283 |
} |
|
284 |
|
|
285 |
protected String getAvailabilityFromRDFResource(final Resource resource){ |
|
286 |
//TODO: implement this when George adds the rel to the model |
|
287 |
return ""; |
|
288 |
} |
|
289 |
|
|
290 |
|
|
291 |
|
|
292 |
|
|
293 |
|
|
294 |
|
|
295 |
|
|
296 |
|
|
96 | 297 |
} |
Also available in: Unified diff
First mapping for services and tests