1
|
package eu.dnetlib.parthenos.virtuoso;
|
2
|
|
3
|
import java.util.Map;
|
4
|
|
5
|
import eu.dnetlib.parthenos.publisher.ParthenosPublisherException;
|
6
|
import eu.dnetlib.parthenos.publisher.SaxonHelper;
|
7
|
import net.sf.saxon.s9api.SaxonApiException;
|
8
|
import net.sf.saxon.s9api.Serializer;
|
9
|
import net.sf.saxon.s9api.XPathSelector;
|
10
|
import org.apache.commons.io.IOUtils;
|
11
|
import org.apache.commons.lang3.StringUtils;
|
12
|
import org.apache.commons.logging.Log;
|
13
|
import org.apache.commons.logging.LogFactory;
|
14
|
import org.apache.jena.datatypes.xsd.XSDDatatype;
|
15
|
import org.apache.jena.ext.com.google.common.collect.Maps;
|
16
|
import org.apache.jena.rdf.model.*;
|
17
|
import virtuoso.jena.driver.VirtModel;
|
18
|
|
19
|
/**
|
20
|
* Created by Alessia Bardi on 12/07/2017.
|
21
|
*
|
22
|
* @author Alessia Bardi
|
23
|
*/
|
24
|
public class VirtuosoClient {
|
25
|
|
26
|
private static final Log log = LogFactory.getLog(VirtuosoClient.class);
|
27
|
|
28
|
private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
|
29
|
private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
|
30
|
private static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
31
|
|
32
|
private static final String PROVENANCE_GRAPH = "provenance";
|
33
|
public static Property IS_API_OF = ResourceFactory.createProperty("dnet", "isApiOf");
|
34
|
public static Property COLL_FROM = ResourceFactory.createProperty("dnet", "collectedFrom");
|
35
|
public static Property COLL_IN_DATE = ResourceFactory.createProperty("dnet", "collectedInDate");
|
36
|
public static Property TRANS_IN_DATE = ResourceFactory.createProperty("dnet", "transformedInDate");
|
37
|
|
38
|
private SaxonHelper saxonHelper;
|
39
|
private XPathSelector xpathSelectorObjIdentifier;
|
40
|
private XPathSelector xpathSelectorCollectionDate;
|
41
|
private XPathSelector xpathSelectorTransformationDate;
|
42
|
private XPathSelector xpathSelectorDatasourceName;
|
43
|
private XPathSelector xpathSelectorDatasourceApi;
|
44
|
private XPathSelector xpathSelectorRDF;
|
45
|
|
46
|
private String connectionString;
|
47
|
private String username;
|
48
|
private String password;
|
49
|
private String defaultBaseURI;
|
50
|
|
51
|
protected VirtuosoClient(final String connectionString,
|
52
|
final String username,
|
53
|
final String password,
|
54
|
final SaxonHelper saxonHelper,
|
55
|
final String defaultBaseURI)
|
56
|
throws ParthenosPublisherException {
|
57
|
this.connectionString = connectionString;
|
58
|
this.username = username;
|
59
|
this.password = password;
|
60
|
this.saxonHelper = saxonHelper;
|
61
|
this.defaultBaseURI = defaultBaseURI;
|
62
|
try {
|
63
|
prepareXpathSelectors();
|
64
|
}catch(SaxonApiException e){
|
65
|
log.error(e.getMessage());
|
66
|
throw new ParthenosPublisherException(e);
|
67
|
}
|
68
|
}
|
69
|
|
70
|
//TODO: exploit new method eu.dnetlib.parthenos.publisher.SaxonHelper.Helper.parseXML() to avoid re-parsing the full record.
|
71
|
|
72
|
public long feed(final String record) throws ParthenosPublisherException{
|
73
|
Model md = null ;
|
74
|
try {
|
75
|
if (StringUtils.isBlank(record)) {
|
76
|
log.warn("Got empty record");
|
77
|
return 0;
|
78
|
}
|
79
|
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
|
80
|
if (StringUtils.isBlank(objIdentifier)) {
|
81
|
log.warn("Got record with no objIdentifier -- skipping");
|
82
|
return 0;
|
83
|
}
|
84
|
String rdfBlock = extractFromRecord(record, xpathSelectorRDF);
|
85
|
if (StringUtils.isBlank(rdfBlock)) {
|
86
|
log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier + " all triples in that named graph will be deleted");
|
87
|
}
|
88
|
String collectionDate = extractFromRecord(record, xpathSelectorCollectionDate);
|
89
|
String transformationDate = extractFromRecord(record, xpathSelectorTransformationDate);
|
90
|
String datasource = extractFromRecord(record, xpathSelectorDatasourceName);
|
91
|
String dsInterface = extractFromRecord(record, xpathSelectorDatasourceApi);
|
92
|
|
93
|
String namedGraph = getRecordDefaultURI(objIdentifier, dsInterface);
|
94
|
log.debug("Trying to open the database model " + namedGraph+", connection string "+getConnectionString());
|
95
|
|
96
|
md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword());
|
97
|
log.debug("Opened virtuoso model for graph " + namedGraph);
|
98
|
md.removeAll();
|
99
|
log.debug("Removed all triples from graph " + namedGraph);
|
100
|
md.read(IOUtils.toInputStream(rdfBlock, "UTF-8"), getDefaultBaseURI());
|
101
|
long size = md.size();
|
102
|
log.debug("Graph " + namedGraph + " now has " + size + " triples");
|
103
|
|
104
|
long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface);
|
105
|
log.debug("provenance graph for " + namedGraph + " updated with " + ntriples + " triples");
|
106
|
md.close();
|
107
|
return size;
|
108
|
}catch(Throwable e){
|
109
|
if (md != null && !md.isClosed()) md.close();
|
110
|
log.error(e);
|
111
|
throw new ParthenosPublisherException(e);
|
112
|
}
|
113
|
}
|
114
|
|
115
|
long feedProvenance(final String namedGraphURI, final String collectionDate, final String transformationDate, final String datasource, final String api) {
|
116
|
Model md = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
|
117
|
|
118
|
//TODO: use prov-o instead: https://www.w3.org/TR/prov-o/#description
|
119
|
Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
|
120
|
Resource r = ResourceFactory.createResource(namedGraphURI);
|
121
|
Statement stmApi =
|
122
|
ResourceFactory.createStatement(rApi, IS_API_OF, ResourceFactory.createPlainLiteral(datasource));
|
123
|
Statement stmCollFrom =
|
124
|
ResourceFactory.createStatement(r, COLL_FROM, rApi);
|
125
|
Statement stmCollDate = ResourceFactory
|
126
|
.createStatement(r, COLL_IN_DATE, ResourceFactory.createTypedLiteral(collectionDate, XSDDatatype.XSDdateTime));
|
127
|
Statement stmTransDate = ResourceFactory
|
128
|
.createStatement(r, TRANS_IN_DATE, ResourceFactory.createTypedLiteral(transformationDate, XSDDatatype.XSDdateTime));
|
129
|
|
130
|
//let's remove previous provenance statements for this resource:
|
131
|
md.removeAll(r, null, null);
|
132
|
//and add the new ones
|
133
|
md.add(stmApi).add(stmCollFrom).add(stmCollDate).add(stmTransDate);
|
134
|
md.close();
|
135
|
return 3;
|
136
|
}
|
137
|
|
138
|
public long feed(final Iterable<String> records) throws ParthenosPublisherException {
|
139
|
//TODO: can we do it in parallel? if all records have different objIdentifier it is safe, and this must be the case anyway, because the source of records is a D-Net mdstore.
|
140
|
long count = 0;
|
141
|
for (String r : records) count += this.feed(r);
|
142
|
return count;
|
143
|
}
|
144
|
|
145
|
/**
|
146
|
* Delete all triples in named graphs collected from the given api
|
147
|
* @param api the id of the API
|
148
|
* @return the number of triples deleted from the named graphs associated to the given api
|
149
|
*/
|
150
|
public long drop(final String api){
|
151
|
Model prov = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
|
152
|
//look for all named graphs associated to the api
|
153
|
Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
|
154
|
long deletedTriples = 0;
|
155
|
final ResIterator resIterator = prov.listSubjectsWithProperty(COLL_FROM, rApi);
|
156
|
while (resIterator.hasNext()) {
|
157
|
Resource namedGraphURI = resIterator.nextResource();
|
158
|
//delete all triples belonging to the r named graph
|
159
|
deletedTriples += dropNamedGraph(namedGraphURI.getURI());
|
160
|
//delete the named graph from the provenance graph
|
161
|
prov.removeAll(namedGraphURI, null, null);
|
162
|
}
|
163
|
//delete the api from the provenance graph
|
164
|
prov.removeAll(null, null, rApi);
|
165
|
prov.removeAll(rApi, null, null);
|
166
|
prov.close();
|
167
|
return deletedTriples;
|
168
|
}
|
169
|
|
170
|
private long dropNamedGraph(String namedGraphURI){
|
171
|
Model namedGraph = VirtModel.openDatabaseModel(namedGraphURI, getConnectionString(), getUsername(), getPassword());
|
172
|
long deletedTriples = namedGraph.size();
|
173
|
namedGraph.removeAll();
|
174
|
namedGraph.close();
|
175
|
return deletedTriples;
|
176
|
}
|
177
|
|
178
|
private String getRecordDefaultURI(final String objIdentifier, final String datasourceApi) {
|
179
|
return defaultBaseURI + datasourceApi + "/" + objIdentifier;
|
180
|
}
|
181
|
|
182
|
private void prepareXpathSelectors() throws SaxonApiException {
|
183
|
Map<String, String> namespaces = Maps.newHashMap();
|
184
|
namespaces.put("oai", OAI_NAMESPACE_URI);
|
185
|
namespaces.put("dri", DRI_NAMESPACE_URI);
|
186
|
namespaces.put("rdf", RDF_NAMESPACE_URI);
|
187
|
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces);
|
188
|
xpathSelectorCollectionDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfCollection/text()", namespaces);
|
189
|
xpathSelectorTransformationDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfTransformation/text()", namespaces);
|
190
|
xpathSelectorDatasourceName = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourcename/text()", namespaces);
|
191
|
xpathSelectorDatasourceApi = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourceapi/text()", namespaces);
|
192
|
xpathSelectorRDF = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF", namespaces);
|
193
|
}
|
194
|
|
195
|
private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
|
196
|
try {
|
197
|
return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
|
198
|
} catch (SaxonApiException e) {
|
199
|
log.error(e);
|
200
|
throw new RuntimeException("Cannot extract content ", e);
|
201
|
}
|
202
|
}
|
203
|
|
204
|
public String getConnectionString() {
|
205
|
return connectionString;
|
206
|
}
|
207
|
|
208
|
public String getUsername() {
|
209
|
return username;
|
210
|
}
|
211
|
|
212
|
public String getPassword() {
|
213
|
return password;
|
214
|
}
|
215
|
|
216
|
public SaxonHelper getSaxonHelper() {
|
217
|
return saxonHelper;
|
218
|
}
|
219
|
|
220
|
public String getDefaultBaseURI() {
|
221
|
return defaultBaseURI;
|
222
|
}
|
223
|
}
|