Project

General

Profile

« Previous | Next » 

Revision 48586

JobNodes to work with Virtuoso

View differences:

modules/dnet-parthenos-publisher/trunk/test/main/java/eu/dnetlib/parthenos/virtuoso/VirtuosoClientTest.java
30 30

  
31 31
	@Test
32 32
	public void testFeedProvenance(){
33
		long res = client.feedProvenance("ariadne_____::0002b8b77eb2d6c62f61c6e1a1ce6891", "2017-05-14T12:06:38.152+02:00" , "2017-05-14T12:11:55.573", "ARIADNE");
33
		long res = client.feedProvenance("ariadne_____::0002b8b77eb2d6c62f61c6e1a1ce6891", "2017-05-14T12:06:38.152+02:00" , "2017-05-14T12:11:55.573", "ARIADNE", "ariadne_api");
34 34
		Assert.assertEquals(3, res);
35 35
	}
36 36
}
modules/dnet-parthenos-publisher/trunk/test/main/java/eu/dnetlib/parthenos/virtuoso/VirtuosoTest.java
7 7
import org.apache.jena.graph.Triple;
8 8
import org.apache.jena.rdf.model.Model;
9 9
import org.apache.jena.rdf.model.ResourceFactory;
10
import org.apache.jena.util.iterator.ExtendedIterator;
10 11
import org.junit.Assert;
11 12
import org.junit.Before;
12 13
import org.junit.Ignore;
......
128 129
		assertEquals(2, md.size());
129 130
	}
130 131

  
132
	@Test
133
	public void testSearch(){
134
		VirtGraph graph = new VirtGraph(testGraph, connectionString, testUser, testPwd);
135
		assertNotNull(graph);
136
		graph.clear();
137
		Assert.assertTrue(graph.isEmpty());
138
		Triple t1 = new Triple(NodeFactory.createURI("http://test/1"), NodeFactory.createURI("http://test/is_friend_of"),
139
				NodeFactory.createURI("http://test/KevinBacon"));
140
		Triple t2 = new Triple(NodeFactory.createURI("http://test/1"), NodeFactory.createURI("http://test/is_friend_of"),
141
				NodeFactory.createURI("http://test/BonoVox"));
142
		Triple t3 = new Triple(NodeFactory.createURI("http://test/1"), NodeFactory.createURI("http://test/is_enemy_of"),
143
				NodeFactory.createURI("http://test/Spiderman"));
144
		graph.performAdd(t1);
145
		graph.performAdd(t2);
146
		graph.performAdd(t3);
147
		assertEquals(3, graph.getCount());
131 148

  
149
		final ExtendedIterator<Triple> friends = graph.find(null, NodeFactory.createURI("http://test/is_friend_of"), null);
150
		while(friends.hasNext()){
151
			System.out.println(friends.next());
152
		}
153

  
154
	}
155

  
156

  
132 157
	private InputStream getResourceAsStream(final String classpath) {
133 158
		try {
134 159
			final ClassPathResource resource = new ClassPathResource(classpath);
modules/dnet-parthenos-publisher/trunk/dnet-parthenos-publisher.iml
2 2
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
3 3
  <component name="FacetManager">
4 4
    <facet type="Spring" name="Spring">
5
      <configuration />
5
      <configuration>
6
        <fileset id="fileset" name="Spring Application Context" removed="false">
7
          <file>file://$MODULE_DIR$/src/main/resources/eu/dnetlib/parthenos/applicationContext-msro-parthenos-nodes.xml</file>
8
        </fileset>
9
      </configuration>
6 10
    </facet>
7 11
  </component>
8 12
  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
......
69 73
    <orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:3.4.2" level="project" />
70 74
    <orderEntry type="library" name="Maven: joda-time:joda-time:2.9.9" level="project" />
71 75
    <orderEntry type="library" name="Maven: org.springframework:spring-test:4.2.5.RELEASE" level="project" />
76
    <orderEntry type="module" module-name="dnet-msro-service" />
77
    <orderEntry type="module" module-name="dnet-core-services" />
78
    <orderEntry type="library" name="Maven: com.typesafe:config:1.3.0" level="project" />
79
    <orderEntry type="library" name="Maven: javax.mail:mail:1.4.7" level="project" />
80
    <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
81
    <orderEntry type="library" name="Maven: org.codehaus.groovy:groovy-all:2.4.6" level="project" />
82
    <orderEntry type="library" name="Maven: gr.forth.ics.isl:x3ml-engine:1.8.0-SNAPSHOT" level="project" />
83
    <orderEntry type="library" name="Maven: com.thoughtworks.xstream:xstream:1.4.2" level="project" />
84
    <orderEntry type="library" name="Maven: xmlpull:xmlpull:1.1.3.1" level="project" />
85
    <orderEntry type="library" name="Maven: xpp3:xpp3_min:1.1.4c" level="project" />
86
    <orderEntry type="library" name="Maven: com.damnhandy:handy-uri-templates:2.1.6" level="project" />
87
    <orderEntry type="library" name="Maven: org.jooq:joox:1.2.0" level="project" />
88
    <orderEntry type="library" name="Maven: gr.forth.ics.isl:X3ML_reverse_utils:1.5" level="project" />
89
    <orderEntry type="library" name="Maven: org.jsoup:jsoup:1.7.3" level="project" />
72 90
    <orderEntry type="library" name="Maven: virtuoso:jena-driver:3.0" level="project" />
73 91
    <orderEntry type="library" name="Maven: openlink:virtuoso-jdbc:4.0" level="project" />
74 92
    <orderEntry type="library" name="Maven: org.apache.jena:jena-arq:3.3.0" level="project" />
modules/dnet-parthenos-publisher/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/parthenos/virtuoso/DropFromVirtuosoJobNode.java
1
package eu.dnetlib.msro.workflows.nodes.parthenos.virtuoso;
2

  
3
import eu.dnetlib.msro.workflows.graph.Arc;
4
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
5
import eu.dnetlib.msro.workflows.procs.Env;
6
import eu.dnetlib.parthenos.virtuoso.VirtuosoClient;
7
import eu.dnetlib.parthenos.virtuoso.VirtuosoClientFactory;
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.springframework.beans.factory.annotation.Autowired;
11

  
12
public class DropFromVirtuosoJobNode extends SimpleJobNode {
13

  
14
	private static final Log log = LogFactory.getLog(DropFromVirtuosoJobNode.class);
15

  
16
	private String datasourceInterface;
17

  
18
	@Autowired
19
	private VirtuosoClientFactory virtuosoClientFactory;
20

  
21
	@Override
22
	protected String execute(final Env env) throws Exception {
23

  
24
		VirtuosoClient virtuosoClient = this.virtuosoClientFactory.getVirtuosoClient();
25
		long nTriples = virtuosoClient.drop(getDatasourceInterface());
26
		log.info("Deleted " + nTriples + " triples in Virtuoso server");
27

  
28
		env.setAttribute("triples", nTriples);
29
		return Arc.DEFAULT_ARC;
30

  
31
	}
32

  
33
	public VirtuosoClientFactory getVirtuosoClientFactory() {
34
		return virtuosoClientFactory;
35
	}
36

  
37
	public void setVirtuosoClientFactory(final VirtuosoClientFactory virtuosoClientFactory) {
38
		this.virtuosoClientFactory = virtuosoClientFactory;
39
	}
40

  
41
	public String getDatasourceInterface() {
42
		return datasourceInterface;
43
	}
44

  
45
	public void setDatasourceInterface(final String datasourceInterface) {
46
		this.datasourceInterface = datasourceInterface;
47
	}
48
}
modules/dnet-parthenos-publisher/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/parthenos/virtuoso/StoreVirtuosoJobNode.java
1
package eu.dnetlib.msro.workflows.nodes.parthenos.virtuoso;
2

  
3
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
4
import eu.dnetlib.msro.workflows.graph.Arc;
5
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
6
import eu.dnetlib.msro.workflows.procs.Env;
7
import eu.dnetlib.parthenos.virtuoso.VirtuosoClient;
8
import eu.dnetlib.parthenos.virtuoso.VirtuosoClientFactory;
9
import eu.dnetlib.rmi.common.ResultSet;
10
import eu.dnetlib.rmi.manager.MSROException;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.springframework.beans.factory.annotation.Autowired;
14

  
15
public class StoreVirtuosoJobNode extends SimpleJobNode {
16

  
17
	private static final Log log = LogFactory.getLog(StoreVirtuosoJobNode.class);
18

  
19
	private String inputEprParam;
20
	private String datasourceInterface;
21

  
22
	@Autowired
23
	private ResultSetClient resultSetClient;
24

  
25
	@Autowired
26
	private VirtuosoClientFactory virtuosoClientFactory;
27

  
28
	@Override
29
	protected String execute(final Env env) throws Exception {
30

  
31
		final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
32
		if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
33
		VirtuosoClient virtuosoClient = this.virtuosoClientFactory.getVirtuosoClient();
34
		long nTriples = virtuosoClient.feed(getResultSetClient().iter(rsIn, String.class), getDatasourceInterface());
35
		log.info("Stored " + nTriples + " triples in Virtuoso server");
36

  
37
		env.setAttribute("triples", nTriples);
38
		return Arc.DEFAULT_ARC;
39

  
40
	}
41

  
42
	public String getInputEprParam() {
43
		return this.inputEprParam;
44
	}
45

  
46
	public void setInputEprParam(final String inputEprParam) {
47
		this.inputEprParam = inputEprParam;
48
	}
49

  
50
	public VirtuosoClientFactory getVirtuosoClientFactory() {
51
		return virtuosoClientFactory;
52
	}
53

  
54
	public void setVirtuosoClientFactory(final VirtuosoClientFactory virtuosoClientFactory) {
55
		this.virtuosoClientFactory = virtuosoClientFactory;
56
	}
57

  
58
	public ResultSetClient getResultSetClient() {
59
		return resultSetClient;
60
	}
61

  
62
	public void setResultSetClient(final ResultSetClient resultSetClient) {
63
		this.resultSetClient = resultSetClient;
64
	}
65

  
66
	public String getDatasourceInterface() {
67
		return datasourceInterface;
68
	}
69

  
70
	public void setDatasourceInterface(final String datasourceInterface) {
71
		this.datasourceInterface = datasourceInterface;
72
	}
73
}
modules/dnet-parthenos-publisher/trunk/src/main/java/eu/dnetlib/parthenos/virtuoso/VirtuosoClient.java
5 5
import com.google.common.collect.Maps;
6 6
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
7 7
import net.sf.saxon.s9api.SaxonApiException;
8
import net.sf.saxon.s9api.Serializer.Property;
8
import net.sf.saxon.s9api.Serializer;
9 9
import net.sf.saxon.s9api.XPathSelector;
10 10
import org.apache.commons.io.IOUtils;
11 11
import org.apache.commons.lang3.StringUtils;
12 12
import org.apache.commons.logging.Log;
13 13
import org.apache.commons.logging.LogFactory;
14
import org.apache.jena.rdf.model.Model;
15
import org.apache.jena.rdf.model.Resource;
16
import org.apache.jena.rdf.model.ResourceFactory;
17
import org.apache.jena.rdf.model.Statement;
14
import org.apache.jena.datatypes.xsd.XSDDatatype;
15
import org.apache.jena.rdf.model.*;
18 16
import virtuoso.jena.driver.VirtModel;
19 17

  
20 18
/**
......
30 28
	private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
31 29
	private static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
32 30

  
31
	private static final String PROVENANCE_GRAPH = "provenance";
32
	public static Property IS_API_OF = ResourceFactory.createProperty("dnet", "isApiOf");
33
	public static Property COLL_FROM = ResourceFactory.createProperty("dnet", "collectedFrom");
34
	public static Property COLL_IN_DATE = ResourceFactory.createProperty("dnet", "collectedInDate");
35
	public static Property TRANS_IN_DATE = ResourceFactory.createProperty("dnet", "transformedInDate");
36

  
33 37
	private SaxonHelper saxonHelper;
34 38
	private XPathSelector xpathSelectorObjIdentifier;
35 39
	private XPathSelector xpathSelectorCollectionDate;
......
56 60
		prepareXpathSelectors();
57 61
	}
58 62

  
59
	public long feed(final String record) {
63
	public long feed(final String record, final String dsInterface) {
60 64
		if (StringUtils.isBlank(record)) {
61 65
			log.warn("Got empty record");
62 66
			return 0;
......
74 78
		String transformationDate = extractFromRecord(record, xpathSelectorTransformationDate);
75 79
		String datasource = extractFromRecord(record, xpathSelectorDatasourceName);
76 80

  
77
		Model md = VirtModel.openDatabaseModel(objIdentifier, getConnectionString(), getUsername(), getPassword());
78
		log.debug("Opened virtuoso model for graph " + objIdentifier);
81
		String namedGraph= getRecordDefaultURI(objIdentifier, dsInterface);
82
		Model md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword());
83
		log.debug("Opened virtuoso model for graph " + namedGraph);
79 84
		md.removeAll();
80
		log.debug("Removed all triples from graph " + objIdentifier);
85
		log.debug("Removed all triples from graph " + namedGraph);
81 86
		md.read(IOUtils.toInputStream(rdfBlock), getDefaultBaseURI());
82 87
		long size = md.size();
83
		log.info("Graph " + objIdentifier + " now has " + size + " triples");
88
		log.info("Graph " + namedGraph + " now has " + size + " triples");
84 89

  
85
		long ntriples = feedProvenance(objIdentifier, collectionDate, transformationDate, datasource);
86
		log.debug("provenance graph updated with " + ntriples + " triples");
90
		long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface);
91
		log.debug("provenance graph for "+namedGraph+" updated with " + ntriples + " triples");
87 92

  
88 93
		return size;
89 94
	}
90 95

  
91
	long feedProvenance(final String objIdentifier, final String collectionDate, final String transformationDate, final String datasource) {
92
		Model md = VirtModel.openDatabaseModel("provenance", getConnectionString(), getUsername(), getPassword());
96
	long feedProvenance(final String namedGraphURI, final String collectionDate, final String transformationDate, final String datasource, final String api) {
97
		Model md = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
93 98

  
94 99
		//TODO: use prov-o instead: https://www.w3.org/TR/prov-o/#description
95
		Resource r = ResourceFactory.createResource(getRecordDefaultURI(objIdentifier, datasource));
100
		Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
101
		Resource r = ResourceFactory.createResource(namedGraphURI);
102
		Statement stmApi =
103
				ResourceFactory.createStatement(rApi, IS_API_OF, ResourceFactory.createPlainLiteral(datasource));
96 104
		Statement stmCollFrom =
97
				ResourceFactory.createStatement(r, ResourceFactory.createProperty("dnet", "collectedFrom"), ResourceFactory.createPlainLiteral(datasource));
98
		//TODO: how to set the type of dates correctly?
105
				ResourceFactory.createStatement(r, COLL_FROM, rApi);
99 106
		Statement stmCollDate = ResourceFactory
100
				.createStatement(r, ResourceFactory.createProperty("dnet", "collectedInDate"), ResourceFactory.createPlainLiteral(collectionDate));
107
				.createStatement(r, COLL_IN_DATE, ResourceFactory.createTypedLiteral(collectionDate, XSDDatatype.XSDdateTime));
101 108
		Statement stmTransDate = ResourceFactory
102
				.createStatement(r, ResourceFactory.createProperty("dnet", "transformedInDate"), ResourceFactory.createPlainLiteral(transformationDate));
109
				.createStatement(r, TRANS_IN_DATE, ResourceFactory.createTypedLiteral(transformationDate, XSDDatatype.XSDdateTime));
103 110

  
104 111
		//let's remove previous provenance statements for this resource:
105 112
		md.removeAll(r, null, null);
106 113
		//and add the new ones
107
		md.add(stmCollDate).add(stmCollFrom).add(stmTransDate);
114
		md.add(stmApi).add(stmCollFrom).add(stmCollDate).add(stmTransDate);
108 115
		return 3;
109 116
	}
110 117

  
111
	public long feed(final Iterable<String> records) {
118
	public long feed(final Iterable<String> records, final String api) {
112 119
		//TODO: can we do it in parallel? if all records have different objIdentifier it is safe, and this must be the case anyway, because the source of records is a D-Net mdstore.
113 120
		long count = 0;
114
		for (String r : records) count += this.feed(r);
121
		for (String r : records) count += this.feed(r, api);
115 122
		return count;
116 123
	}
117 124

  
118
	private String getRecordDefaultURI(final String objIdentifier, final String datasource) {
119
		return defaultBaseURI + datasource + "/" + objIdentifier;
125
	/**
126
	 * Delete all triples in named graphs collected from the given api
127
	 * @param api the id of the API
128
	 * @return the number of triples deleted from the named graphs associated to the given api
129
	 */
130
	public long drop(final String api){
131
		Model prov = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
132
		//look for all named graphs associated to the api
133
		Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
134
		long deletedTriples = 0;
135
		final ResIterator resIterator = prov.listSubjectsWithProperty(COLL_FROM, rApi);
136
		while (resIterator.hasNext()) {
137
			Resource namedGraphURI = resIterator.nextResource();
138
			//delete all triples belonging to the r named graph
139
			deletedTriples += dropNamedGraph(namedGraphURI.getURI());
140
			//delete the named graph from the provenance graph
141
			prov.remove(namedGraphURI, null, null);
142
		}
143
		//delete the api from the provenance graph
144
		prov.remove(null, null, rApi);
145
		prov.remove(rApi, null, null);
146

  
147
		return deletedTriples;
120 148
	}
121 149

  
150
	private long dropNamedGraph(String namedGraphURI){
151
		Model namedGraph = VirtModel.openDatabaseModel(namedGraphURI, getConnectionString(), getUsername(), getPassword());
152
		long deletedTriples = namedGraph.size();
153
		namedGraph.removeAll();
154
		return deletedTriples;
155
	}
156

  
157
	private String getRecordDefaultURI(final String objIdentifier, final String datasourceApi) {
158
		return defaultBaseURI + datasourceApi + "/" + objIdentifier;
159
	}
160

  
122 161
	private void prepareXpathSelectors() throws SaxonApiException {
123 162
		Map<String, String> namespaces = Maps.newHashMap();
124 163
		namespaces.put("oai", OAI_NAMESPACE_URI);
......
133 172

  
134 173
	private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
135 174
		try {
136
			return this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
175
			return this.saxonHelper.help().setSerializerProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
137 176
		} catch (SaxonApiException e) {
138 177
			throw new RuntimeException("Cannot extract content ", e);
139 178
		}
modules/dnet-parthenos-publisher/trunk/src/main/resources/eu/dnetlib/parthenos/applicationContext-msro-parthenos-nodes.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
       xmlns:p="http://www.springframework.org/schema/p"
4
       xmlns="http://www.springframework.org/schema/beans"
5
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
6

  
7
	<bean id="wfNodeStoreVirtuoso"
8
	      class="eu.dnetlib.msro.workflows.nodes.parthenos.virtuoso.StoreVirtuosoJobNode"
9
	      scope="prototype"/>
10

  
11
	<bean id="wfNodeDropFromVirtuoso"
12
	      class="eu.dnetlib.msro.workflows.nodes.parthenos.virtuoso.DropFromVirtuosoJobNode"
13
	      scope="prototype"/>
14
</beans>
modules/dnet-parthenos-publisher/trunk/pom.xml
21 21
			<artifactId>dnet-core-components</artifactId>
22 22
			<version>[2.0.0-SAXONHE-SNAPSHOT]</version>
23 23
		</dependency>
24

  
24 25
		<dependency>
26
			<groupId>eu.dnetlib</groupId>
27
			<artifactId>dnet-msro-service</artifactId>
28
			<version>[6.0.0-SAXONHE-SNAPSHOT]</version>
29
		</dependency>
30
		<dependency>
25 31
			<groupId>virtuoso</groupId>
26 32
			<artifactId>jena-driver</artifactId>
27 33
			<version>3.0</version>

Also available in: Unified diff