Project

General

Profile

1
package eu.dnetlib.parthenos.virtuoso;
2

    
3
import java.util.Map;
4

    
5
import eu.dnetlib.parthenos.publisher.ParthenosPublisherException;
6
import eu.dnetlib.parthenos.publisher.SaxonHelper;
7
import net.sf.saxon.s9api.SaxonApiException;
8
import net.sf.saxon.s9api.Serializer;
9
import net.sf.saxon.s9api.XPathSelector;
10
import org.apache.commons.io.IOUtils;
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.apache.jena.datatypes.xsd.XSDDatatype;
15
import org.apache.jena.ext.com.google.common.collect.Maps;
16
import org.apache.jena.rdf.model.*;
17
import virtuoso.jena.driver.VirtModel;
18

    
19
/**
20
 * Created by Alessia Bardi on 12/07/2017.
21
 *
22
 * @author Alessia Bardi
23
 */
24
public class VirtuosoClient {
25

    
26
	private static final Log log = LogFactory.getLog(VirtuosoClient.class);
27

    
28
	private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
29
	private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
30
	private static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
31

    
32
	private static final String PROVENANCE_GRAPH = "provenance";
33
	public static Property IS_API_OF = ResourceFactory.createProperty("dnet", "isApiOf");
34
	public static Property COLL_FROM = ResourceFactory.createProperty("dnet", "collectedFrom");
35
	public static Property COLL_IN_DATE = ResourceFactory.createProperty("dnet", "collectedInDate");
36
	public static Property TRANS_IN_DATE = ResourceFactory.createProperty("dnet", "transformedInDate");
37

    
38
	private SaxonHelper saxonHelper;
39
	private XPathSelector xpathSelectorObjIdentifier;
40
	private XPathSelector xpathSelectorCollectionDate;
41
	private XPathSelector xpathSelectorTransformationDate;
42
	private XPathSelector xpathSelectorDatasourceName;
43
	private XPathSelector xpathSelectorDatasourceApi;
44
	private XPathSelector xpathSelectorRDF;
45

    
46
	private String connectionString;
47
	private String username;
48
	private String password;
49
	private String defaultBaseURI;
50

    
51
	protected VirtuosoClient(final String connectionString,
52
			final String username,
53
			final String password,
54
			final SaxonHelper saxonHelper,
55
			final String defaultBaseURI)
56
			throws ParthenosPublisherException {
57
		this.connectionString = connectionString;
58
		this.username = username;
59
		this.password = password;
60
		this.saxonHelper = saxonHelper;
61
		this.defaultBaseURI = defaultBaseURI;
62
		try {
63
			prepareXpathSelectors();
64
		}catch(SaxonApiException e){
65
			log.error(e.getMessage());
66
			throw new ParthenosPublisherException(e);
67
		}
68
	}
69

    
70
	//TODO: exploit new method eu.dnetlib.parthenos.publisher.SaxonHelper.Helper.parseXML() to avoid re-parsing the full record.
71

    
72
	public long feed(final String record) throws ParthenosPublisherException{
73
		Model md = null ;
74
		try {
75
			if (StringUtils.isBlank(record)) {
76
				log.warn("Got empty record");
77
				return 0;
78
			}
79
			String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
80
			if (StringUtils.isBlank(objIdentifier)) {
81
				log.warn("Got record with no objIdentifier -- skipping");
82
				return 0;
83
			}
84
			String rdfBlock = extractFromRecord(record, xpathSelectorRDF);
85
			if (StringUtils.isBlank(rdfBlock)) {
86
				log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier + " all triples in that named graph will be deleted");
87
			}
88
			String collectionDate = extractFromRecord(record, xpathSelectorCollectionDate);
89
			String transformationDate = extractFromRecord(record, xpathSelectorTransformationDate);
90
			String datasource = extractFromRecord(record, xpathSelectorDatasourceName);
91
			String dsInterface = extractFromRecord(record, xpathSelectorDatasourceApi);
92

    
93
			String namedGraph = getRecordDefaultURI(objIdentifier, dsInterface);
94
			log.debug("Trying to open the database model " + namedGraph+", connection string "+getConnectionString());
95

    
96
			md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword());
97
			log.debug("Opened virtuoso model for graph " + namedGraph);
98
			md.removeAll();
99
			log.debug("Removed all triples from graph " + namedGraph);
100
			md.read(IOUtils.toInputStream(rdfBlock, "UTF-8"), getDefaultBaseURI());
101
			long size = md.size();
102
			log.debug("Graph " + namedGraph + " now has " + size + " triples");
103

    
104
			long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface);
105
			log.debug("provenance graph for " + namedGraph + " updated with " + ntriples + " triples");
106
			md.close();
107
			return size;
108
		}catch(Throwable e){
109
			if (md != null && !md.isClosed()) md.close();
110
			log.error(e);
111
			throw new ParthenosPublisherException(e);
112
		}
113
	}
114

    
115
	long feedProvenance(final String namedGraphURI, final String collectionDate, final String transformationDate, final String datasource, final String api) {
116
		Model md = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
117

    
118
		//TODO: use prov-o instead: https://www.w3.org/TR/prov-o/#description
119
		Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
120
		Resource r = ResourceFactory.createResource(namedGraphURI);
121
		Statement stmApi =
122
				ResourceFactory.createStatement(rApi, IS_API_OF, ResourceFactory.createPlainLiteral(datasource));
123
		Statement stmCollFrom =
124
				ResourceFactory.createStatement(r, COLL_FROM, rApi);
125
		Statement stmCollDate = ResourceFactory
126
				.createStatement(r, COLL_IN_DATE, ResourceFactory.createTypedLiteral(collectionDate, XSDDatatype.XSDdateTime));
127
		Statement stmTransDate = ResourceFactory
128
				.createStatement(r, TRANS_IN_DATE, ResourceFactory.createTypedLiteral(transformationDate, XSDDatatype.XSDdateTime));
129

    
130
		//let's remove previous provenance statements for this resource:
131
		md.removeAll(r, null, null);
132
		//and add the new ones
133
		md.add(stmApi).add(stmCollFrom).add(stmCollDate).add(stmTransDate);
134
		md.close();
135
		return 3;
136
	}
137

    
138
	public long feed(final Iterable<String> records) throws ParthenosPublisherException {
139
		//TODO: can we do it in parallel? if all records have different objIdentifier it is safe, and this must be the case anyway, because the source of records is a D-Net mdstore.
140
		long count = 0;
141
		for (String r : records) count += this.feed(r);
142
		return count;
143
	}
144

    
145
	/**
146
	 * Delete all triples in named graphs collected from the given api
147
	 * @param api the id of the API
148
	 * @return the number of triples deleted from the named graphs associated to the given api
149
	 */
150
	public long drop(final String api){
151
		Model prov = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
152
		//look for all named graphs associated to the api
153
		Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
154
		long deletedTriples = 0;
155
		final ResIterator resIterator = prov.listSubjectsWithProperty(COLL_FROM, rApi);
156
		while (resIterator.hasNext()) {
157
			Resource namedGraphURI = resIterator.nextResource();
158
			//delete all triples belonging to the r named graph
159
			deletedTriples += dropNamedGraph(namedGraphURI.getURI());
160
			//delete the named graph from the provenance graph
161
			prov.removeAll(namedGraphURI, null, null);
162
		}
163
		//delete the api from the provenance graph
164
		prov.removeAll(null, null, rApi);
165
		prov.removeAll(rApi, null, null);
166
		prov.close();
167
		return deletedTriples;
168
	}
169

    
170
	private long dropNamedGraph(String namedGraphURI){
171
		Model namedGraph = VirtModel.openDatabaseModel(namedGraphURI, getConnectionString(), getUsername(), getPassword());
172
		long deletedTriples = namedGraph.size();
173
		namedGraph.removeAll();
174
		namedGraph.close();
175
		return deletedTriples;
176
	}
177

    
178
	private String getRecordDefaultURI(final String objIdentifier, final String datasourceApi) {
179
		return defaultBaseURI + datasourceApi + "/" + objIdentifier;
180
	}
181

    
182
	private void prepareXpathSelectors() throws SaxonApiException {
183
		Map<String, String> namespaces = Maps.newHashMap();
184
		namespaces.put("oai", OAI_NAMESPACE_URI);
185
		namespaces.put("dri", DRI_NAMESPACE_URI);
186
		namespaces.put("rdf", RDF_NAMESPACE_URI);
187
		xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:objIdentifier/text()", namespaces);
188
		xpathSelectorCollectionDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfCollection/text()", namespaces);
189
		xpathSelectorTransformationDate = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:dateOfTransformation/text()", namespaces);
190
		xpathSelectorDatasourceName = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourcename/text()", namespaces);
191
		xpathSelectorDatasourceApi = this.saxonHelper.help().prepareXPathSelector("//oai:header/dri:datasourceapi/text()", namespaces);
192
		xpathSelectorRDF = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/rdf:RDF", namespaces);
193
	}
194

    
195
	private String  extractFromRecord(final String record, final XPathSelector xPathSelector) {
196
		try {
197
			return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
198
		} catch (SaxonApiException e) {
199
			log.error(e);
200
			throw new RuntimeException("Cannot extract content ", e);
201
		}
202
	}
203

    
204
	public String getConnectionString() {
205
		return connectionString;
206
	}
207

    
208
	public String getUsername() {
209
		return username;
210
	}
211

    
212
	public String getPassword() {
213
		return password;
214
	}
215

    
216
	public SaxonHelper getSaxonHelper() {
217
		return saxonHelper;
218
	}
219

    
220
	public String getDefaultBaseURI() {
221
		return defaultBaseURI;
222
	}
223
}
(1-1/3)