Revision 49001
Added by Alessia Bardi over 6 years ago
VirtuosoClient.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.parthenos.virtuoso; |
2 | 2 |
|
3 |
import java.io.IOException; |
|
3 | 4 |
import java.util.Map; |
4 | 5 |
|
5 |
import com.google.common.collect.Maps;
|
|
6 |
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
|
6 |
import eu.dnetlib.parthenos.publisher.ParthenosPublisherException;
|
|
7 |
import eu.dnetlib.parthenos.publisher.SaxonHelper;
|
|
7 | 8 |
import net.sf.saxon.s9api.SaxonApiException; |
8 | 9 |
import net.sf.saxon.s9api.Serializer; |
9 | 10 |
import net.sf.saxon.s9api.XPathSelector; |
... | ... | |
12 | 13 |
import org.apache.commons.logging.Log; |
13 | 14 |
import org.apache.commons.logging.LogFactory; |
14 | 15 |
import org.apache.jena.datatypes.xsd.XSDDatatype; |
16 |
import org.apache.jena.ext.com.google.common.collect.Maps; |
|
15 | 17 |
import org.apache.jena.rdf.model.*; |
16 | 18 |
import virtuoso.jena.driver.VirtModel; |
17 | 19 |
|
... | ... | |
52 | 54 |
final String password, |
53 | 55 |
final SaxonHelper saxonHelper, |
54 | 56 |
final String defaultBaseURI) |
55 |
throws SaxonApiException {
|
|
57 |
throws ParthenosPublisherException {
|
|
56 | 58 |
this.connectionString = connectionString; |
57 | 59 |
this.username = username; |
58 | 60 |
this.password = password; |
59 | 61 |
this.saxonHelper = saxonHelper; |
60 | 62 |
this.defaultBaseURI = defaultBaseURI; |
61 |
prepareXpathSelectors(); |
|
63 |
try { |
|
64 |
prepareXpathSelectors(); |
|
65 |
}catch(SaxonApiException e){ |
|
66 |
throw new ParthenosPublisherException(e); |
|
67 |
} |
|
62 | 68 |
} |
63 | 69 |
|
64 |
public long feed(final String record) { |
|
65 |
if (StringUtils.isBlank(record)) { |
|
66 |
log.warn("Got empty record"); |
|
67 |
return 0; |
|
68 |
} |
|
69 |
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier); |
|
70 |
if (StringUtils.isBlank(objIdentifier)) { |
|
71 |
log.warn("Got record with no objIdentifier -- skipping"); |
|
72 |
return 0; |
|
73 |
} |
|
74 |
String rdfBlock = extractFromRecord(record, xpathSelectorRDF); |
|
75 |
if (StringUtils.isBlank(rdfBlock)) { |
|
76 |
log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier + " all triples in that named graph will be deleted"); |
|
77 |
} |
|
78 |
String collectionDate = extractFromRecord(record, xpathSelectorCollectionDate); |
|
79 |
String transformationDate = extractFromRecord(record, xpathSelectorTransformationDate); |
|
80 |
String datasource = extractFromRecord(record, xpathSelectorDatasourceName); |
|
81 |
String dsInterface = extractFromRecord(record, xpathSelectorDatasourceApi); |
|
70 |
public long feed(final String record) throws ParthenosPublisherException{ |
|
71 |
try { |
|
72 |
if (StringUtils.isBlank(record)) { |
|
73 |
log.warn("Got empty record"); |
|
74 |
return 0; |
|
75 |
} |
|
76 |
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier); |
|
77 |
if (StringUtils.isBlank(objIdentifier)) { |
|
78 |
log.warn("Got record with no objIdentifier -- skipping"); |
|
79 |
return 0; |
|
80 |
} |
|
81 |
String rdfBlock = extractFromRecord(record, xpathSelectorRDF); |
|
82 |
if (StringUtils.isBlank(rdfBlock)) { |
|
83 |
log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier + " all triples in that named graph will be deleted"); |
|
84 |
} |
|
85 |
String collectionDate = extractFromRecord(record, xpathSelectorCollectionDate); |
|
86 |
String transformationDate = extractFromRecord(record, xpathSelectorTransformationDate); |
|
87 |
String datasource = extractFromRecord(record, xpathSelectorDatasourceName); |
|
88 |
String dsInterface = extractFromRecord(record, xpathSelectorDatasourceApi); |
|
82 | 89 |
|
83 |
String namedGraph= getRecordDefaultURI(objIdentifier, dsInterface);
|
|
84 |
Model md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword()); |
|
85 |
log.debug("Opened virtuoso model for graph " + namedGraph); |
|
86 |
md.removeAll(); |
|
87 |
log.debug("Removed all triples from graph " + namedGraph); |
|
88 |
md.read(IOUtils.toInputStream(rdfBlock), getDefaultBaseURI());
|
|
89 |
long size = md.size(); |
|
90 |
log.info("Graph " + namedGraph + " now has " + size + " triples"); |
|
90 |
String namedGraph = getRecordDefaultURI(objIdentifier, dsInterface);
|
|
91 |
Model md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword());
|
|
92 |
log.debug("Opened virtuoso model for graph " + namedGraph);
|
|
93 |
md.removeAll();
|
|
94 |
log.debug("Removed all triples from graph " + namedGraph);
|
|
95 |
md.read(IOUtils.toInputStream(rdfBlock, "UTF-8"), getDefaultBaseURI());
|
|
96 |
long size = md.size();
|
|
97 |
log.info("Graph " + namedGraph + " now has " + size + " triples");
|
|
91 | 98 |
|
92 |
long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface); |
|
93 |
log.debug("provenance graph for "+namedGraph+" updated with " + ntriples + " triples");
|
|
99 |
long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface);
|
|
100 |
log.debug("provenance graph for " + namedGraph + " updated with " + ntriples + " triples");
|
|
94 | 101 |
|
95 |
return size; |
|
102 |
return size; |
|
103 |
}catch(IOException e){ |
|
104 |
throw new ParthenosPublisherException(e); |
|
105 |
} |
|
96 | 106 |
} |
97 | 107 |
|
98 | 108 |
long feedProvenance(final String namedGraphURI, final String collectionDate, final String transformationDate, final String datasource, final String api) { |
... | ... | |
117 | 127 |
return 3; |
118 | 128 |
} |
119 | 129 |
|
120 |
public long feed(final Iterable<String> records) { |
|
130 |
public long feed(final Iterable<String> records) throws ParthenosPublisherException {
|
|
121 | 131 |
//TODO: can we do it in parallel? if all records have different objIdentifier it is safe, and this must be the case anyway, because the source of records is a D-Net mdstore. |
122 | 132 |
long count = 0; |
123 | 133 |
for (String r : records) count += this.feed(r); |
Also available in: Unified diff
API for storing on virtuoso