Project

General

Profile

« Previous | Next » 

Revision 50754

Now we are able to index data collected from virtuoso via CONSTRUCT queries, yeah!

View differences:

modules/dnet-parthenos/trunk/src/main/java/eu/dnetlib/parthenos/workflows/nodes/ReadVirtuosoJobNode.java
18 18

  
19 19
	private static final Log log = LogFactory.getLog(ReadVirtuosoJobNode.class);
20 20

  
21
	private String datasourceId;
21
	private String datasourceName;
22 22
	private String datasourceInterface;
23 23
	private String virtuosoReaderAPIUrl;
24 24

  
......
29 29
	protected String execute(final Env env){
30 30
		log.info("Using virtuoso reader at : " + getVirtuosoReaderAPIUrl());
31 31

  
32
		VirtuosoParthenosIterator iterator = new VirtuosoParthenosIterator().datasourceInterface(datasourceInterface).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl());
32
		VirtuosoParthenosIterator iterator = new VirtuosoParthenosIterator().datasourceInterface(getDatasourceInterface()).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl()).datasourceName(getDatasourceName());
33 33
		ResultSet<String> rs = rsFactory.createResultSet(new VirtuosoParthenosIterable(iterator));
34 34

  
35 35
		env.setAttribute("virtuoso_rs", rs);
......
37 37
		return Arc.DEFAULT_ARC;
38 38
	}
39 39

  
40
	public String getDatasourceId() {
41
		return datasourceId;
40
	public String getDatasourceName() {
41
		return datasourceName;
42 42
	}
43 43

  
44
	public void setDatasourceId(final String datasourceId) {
45
		this.datasourceId = datasourceId;
44
	public void setDatasourceName(final String datasourceName) {
45
		this.datasourceName = datasourceName;
46 46
	}
47 47

  
48 48
	public String getDatasourceInterface() {
modules/dnet-parthenos/trunk/src/main/java/eu/dnetlib/parthenos/workflows/nodes/VirtuosoParthenosIterator.java
12 12
import com.google.common.collect.Queues;
13 13
import eu.dnetlib.data.collector.ThreadSafeIterator;
14 14
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
15
import org.apache.commons.lang3.StringEscapeUtils;
15 16
import org.apache.commons.logging.Log;
16 17
import org.apache.commons.logging.LogFactory;
17 18
import org.apache.http.client.utils.URIBuilder;
......
29 30
	protected static final int QUEUE_TIMEOUT_SECONDS = 5;
30 31
	public final static String TERMINATOR = "ARNOLD";
31 32

  
33
	private String datasourceName;
32 34
	private String datasourceInterface;
33 35
	private String virtuosoReaderAPIUrl;
34 36
	private boolean started = false;
......
39 41
	private BlockingQueue<String> elements = Queues.newArrayBlockingQueue(10);
40 42

  
41 43
	private String currentElement = null;
42
	RestTemplate restTemplate = new RestTemplate();
44
	private RestTemplate restTemplate = new RestTemplate();
43 45

  
44 46

  
45 47
	private synchronized void verifyStarted() throws InterruptedException {
......
124 126
					ResponseEntity<String> response = restTemplate.exchange(uri, HttpMethod.GET, entity, String.class);
125 127
					log.debug("Result - status ("+ response.getStatusCode() + ") has body: " + response.hasBody());
126 128
					String rdfFile = response.getBody();
127
					log.debug(rdfFile);
128
					elements.offer(rdfFile, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
129
					final String xmlFile = completeXML(rdfFile, subject);
130
					elements.offer(xmlFile, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
131
					log.debug(xmlFile);
129 132
					subject = subjects.take();
130 133
				}
131 134
			} catch (Throwable t) {
......
142 145
		}).start();
143 146
	}
144 147

  
148

  
149
	public String completeXML(final String rdfFile, final String url){
150
		String xmlEscapedURL = StringEscapeUtils.escapeXml11(url);
151
		return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><record><header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\"><dri:objIdentifier>"+xmlEscapedURL+"</dri:objIdentifier><dri:datasourceapi>"+datasourceInterface+"</dri:datasourceapi><dri:datasourcename>"+datasourceName+"</dri:datasourcename></header><metadata>"+rdfFile+"</metadata></record>";
152
	}
153

  
154

  
145 155
	@Override
146 156
	public boolean doHasNext() {
147 157
		try {
......
159 169
		} catch (InterruptedException e) {
160 170
			throw new CollectorServiceRuntimeException(e);
161 171
		}
162
		if(currentElement.equals(TERMINATOR)) throw new NoSuchElementException();
172
		if(currentElement.equals(TERMINATOR))
173
			throw new NoSuchElementException();
163 174
		else{
164 175
			String res = currentElement;
165 176
			try {
......
188 199
		return this;
189 200
	}
190 201

  
202
	public VirtuosoParthenosIterator datasourceName(final String datasourceName) {
203
		this.datasourceName = datasourceName;
204
		return this;
205
	}
206

  
191 207
	public VirtuosoParthenosIterator errors(final Map<String, Integer> errors) {
192 208
		this.errors = errors;
193 209
		return this;
modules/dnet-parthenos/trunk/src/main/resources/eu/dnetlib/parthenos/workflows/repo-hi/NEW_publish_wf.xml.st
52 52
                        <PARAM name="wfTemplateId" value="9f1b9aab-d1d5-4ece-a04e-e1157eeb4a11_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
53 53
                        <PARAM name="wfTemplateParams">
54 54
                            <MAP>
55
                                <ENTRY key="dsId"               value="$dsId$" />
55
                                <ENTRY key="dsName"               value="$dsName$" />
56 56
                                <ENTRY key="interface"          value="$interface$" />
57 57
                                <ENTRY key="virtuosoReaderAPIUrl"     ref="virtuosoReaderAPIUrl" />
58 58
                                <ENTRY key="indexId"            ref="indexId" />
modules/dnet-parthenos/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/MDFormatDSResourceType/public-CIDOC.xml
22 22
						<FIELD indexable="false" name="entity" result="true" stat="false" tokenizable="true" xpath="//rdf:RDF"/>
23 23

  
24 24
                        <FIELD indexable="true" name="objidentifier" result="false" stat="false" tokenizable="false" xpath="//dri:objIdentifier"/>
25
                        <FIELD header="true" indexable="true" name="originalid" result="false" stat="false" tokenizable="false" xpath="//dri:recordIdentifier"/>
26 25
                        <FIELD indexable="true" name="datasourcename" result="false" stat="false" xpath="//dri:datasourcename"/>
27
                        <FIELD indexable="true" name="datasourceprefix" result="false"  stat="false" xpath="//dri:datasourceprefix"/>
28 26
                        <FIELD header="true" indexable="true" name="datasourceapi" result="false" stat="false" xpath="//dri:datasourceapi"/>
29
	                    <FIELD indexable="true" name="url" result="false" stat="false" tokenizable="false" xpath="//rdf:Description/@rdf:about"/>
27

  
28
	                    <FIELD indexable="true" name="url" result="false" stat="false" tokenizable="false" xpath="//dri:objIdentifier"/>
30 29
	                    <FIELD indexable="true" name="language" result="false" stat="false" tokenizable="false" xpath="//crm:P72_has_language"/>
31 30

  
32 31
                        <!-- Fields requested by T6.5-->
modules/dnet-parthenos/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/publish/solr_publish_template.xml
10 10
	<BODY>
11 11
		<CONFIGURATION>
12 12
			<PARAMETERS>
13
				<PARAM name="dsId" description="Datasource Id" required="true" type="string"/>
13
				<PARAM name="dsName" description="Datasource Name" required="true" type="string"/>
14 14
				<PARAM name="interface" description="Datasource Interface" required="true" type="string"/>
15 15
				<PARAM name="virtuosoReaderAPIUrl" description="Virtuoso reader endpoint" required="true" type="string"/>
16 16
				<PARAM name="indexId" description="Index Identifier" required="true" type="string"/>
......
21 21
				<NODE name="readFromVirtuoso" type="ReadVirtuoso" isStart="true">
22 22
					<DESCRIPTION>Fetch records from Virtuoso</DESCRIPTION>
23 23
					<PARAMETERS>
24
						<PARAM name="datasourceId" ref="dsId"/>
24
						<PARAM name="datasourceName" ref="dsName"/>
25 25
						<PARAM name="datasourceInterface" ref="interface"/>
26 26
						<PARAM name="virtuosoReaderAPIUrl" ref="virtuosoReaderAPIUrl"/>
27 27
					</PARAMETERS>

Also available in: Unified diff