Project

General

Profile

1
package eu.dnetlib.parthenos.virtuoso;
2

    
3
import java.io.OutputStream;
4
import java.util.Iterator;
5
import java.util.List;
6

    
7
import com.google.common.collect.Iterators;
8
import com.google.common.collect.Lists;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.apache.jena.query.Query;
12
import org.apache.jena.query.QueryExecutionFactory;
13
import org.apache.jena.query.QueryFactory;
14
import org.apache.jena.query.ResultSet;
15
import org.apache.jena.rdf.model.Model;
16
import org.apache.jena.riot.RDFDataMgr;
17
import org.apache.jena.riot.RDFFormat;
18
import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
19
import org.springframework.beans.factory.annotation.Value;
20
import org.springframework.web.bind.annotation.RequestMapping;
21
import org.springframework.web.bind.annotation.RequestMethod;
22
import org.springframework.web.bind.annotation.RequestParam;
23
import org.springframework.web.bind.annotation.RestController;
24
import virtuoso.jena.driver.VirtGraph;
25
import virtuoso.jena.driver.VirtuosoQueryExecution;
26
import virtuoso.jena.driver.VirtuosoQueryExecutionFactory;
27

    
28
/**
29
 * Created by Alessia Bardi on 31/01/2018.
30
 * Read-only API for virtuoso.
31
 *
32
 * //TODO: pagination
33
 * //TODO swagger documentation?
34
 *
35
 * @author Alessia Bardi
36
 */
37
@RestController
38
public class VirtuosoReadAPI {
39

    
40
	private static final Log log = LogFactory.getLog(VirtuosoReadAPI.class);
41

    
42
	@Value("${virtuoso.sparqlurl}")
43
	private String sparqlUrl;
44
	@Value("${virtuoso.pwd}")
45
	private String username;
46
	@Value("${virtuoso.pwd}")
47
	private String password;
48
	@Value("${virtuoso.uri.base.default}")
49
	private String defaultBaseURI;
50

    
51
	@RequestMapping(value = "/virtuoso/graphs", produces = { "application/json" }, method = RequestMethod.GET)
52
	public List<String> getGraphURLs(@RequestParam final String api) {
53
		String queryForGraphs =
54
				"SELECT DISTINCT ?g WHERE { { ?g <dnetcollectedFrom> <"+defaultBaseURI+api+"> }}";
55

    
56
		log.debug(queryForGraphs);
57
		Query query = QueryFactory.create(queryForGraphs);
58
		final QueryEngineHTTP serviceRequest = QueryExecutionFactory.createServiceRequest(sparqlUrl, query);
59
		ResultSet graphs = serviceRequest.execSelect();
60
		Iterator<String> s = Iterators.transform(graphs, qs -> qs.getResource("g").getURI());
61
		List<String> res = Lists.newArrayList(s);
62
		serviceRequest.close();
63
		return res;
64
	}
65

    
66
	@RequestMapping(value = "/virtuoso/subjects", produces = { "application/json" }, method = RequestMethod.GET)
67
	public List<String> getSubjectsForGraph(@RequestParam final String graph) {
68
		// To filter out all subjects that do not start with 'http' use the following query
69
		// String queryForSubjectsTemplate = "SELECT DISTINCT ?s WHERE {GRAPH <%s> {?s ?p ?o . FILTER regex(str(?s),'^http')}}";
70
		// Based on FORTH observation, those without a URL should not be interesting by themselves, but looking at Nakala data it seems this is not the case:
71
		String queryForSubjectsTemplate = "SELECT DISTINCT ?s WHERE {GRAPH <%s> {?s ?p ?o }}";
72
		String q = String.format(queryForSubjectsTemplate, graph);
73
		log.debug(q);
74
		final QueryEngineHTTP serviceRequest =
75
				QueryExecutionFactory.createServiceRequest(sparqlUrl, QueryFactory.create(q));
76
		ResultSet subjects = serviceRequest.execSelect();
77
		Iterator<String> s = Iterators.transform(subjects, qs -> qs.getResource("s").getURI());
78
		List<String> res = Lists.newArrayList(s);
79
		serviceRequest.close();
80
		return res;
81
	}
82

    
83
	@RequestMapping(value = "/virtuoso/subject", produces = { "application/rdf+xml", "application/xml" }, method = RequestMethod.GET)
84
	public void getSubject(@RequestParam final String subjectURL, final OutputStream responseStream) {
85
		//String describeQuery = "DESCRIBE <" + subjectURL + ">";
86
		//log.debug(describeQuery);
87
		//Can't use DESCRIBE query: server decides what to return (i.e. not all triples) and cannot find out if and how Virtuoso can be configured
88

    
89
		String q = "DEFINE input:inference <parthenos_rules> CONSTRUCT {<%s> ?p ?o . } WHERE { <%s> ?p ?o .}";
90
		//String q = "CONSTRUCT {<%s> ?p ?o . } WHERE { <%s> ?p ?o .}";
91
		String query = String.format(q, subjectURL, subjectURL, subjectURL, subjectURL);
92
		log.debug(query);
93
		VirtGraph g = new VirtGraph(sparqlUrl, username, password);
94
		g.setReadFromAllGraphs(true);
95
		VirtuosoQueryExecution vqe = VirtuosoQueryExecutionFactory.create(query, g);
96
		Model resultModel = vqe.execConstruct();
97
		RDFDataMgr.write(responseStream, resultModel, RDFFormat.RDFXML_PLAIN);
98
		vqe.close();
99
		g.close();
100
	}
101

    
102

    
103
}
(3-3/3)