Project

General

Profile

« Previous | Next » 

Revision 32940

implemented pangea by journal workflow

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/doaj_journals/sql/doajJournals_prepareTables.sql
1
DROP TABLE IF EXISTS doaj_temp_journal;
2
CREATE TABLE IF NOT EXISTS doaj_temp_journal (
3
	_dnet_resource_identifier_ character varying(2048) DEFAULT 'temp_'||md5(clock_timestamp()::text)||'_'||md5(random()::text),
4
	id                         character varying(255) PRIMARY KEY,
5
	journalname                character varying(255),
6
	issn			           character varying(255) NOT NULL,	
7
	oa_source_id              character varying(255) references datasources(id)
8
);
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsIterator.java
7 7
import org.apache.commons.httpclient.methods.PostMethod;
8 8
import org.apache.commons.httpclient.methods.StringRequestEntity;
9 9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.lang.StringEscapeUtils;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
10 13

  
11 14
import com.google.gson.Gson;
12 15
import com.google.gson.GsonBuilder;
......
16 19
 */
17 20
public class DatasetsIterator implements Iterable<String>, Iterator<String> {
18 21

  
22
	/** The logger. */
23
	private static final Log log = LogFactory.getLog(DatasetsIterator.class);
24

  
19 25
	/** The base url template. */
20 26
	private static String BASE_URL_TEMPLATE = "http://ws.pangaea.de/es/pangaea/panmd/_search?_source=xml&size=%d&from=%d";
21 27

  
22 28
	/** The journal id. */
23
	private String journalId;
29
	private String journalId = "";
24 30

  
31
	/** The journal name. */
32
	private String journalName = "";
33

  
34
	/** The journal issn. */
35
	private String journalISSN = "";
36

  
37
	/** The openaire datasource. */
38
	private String openaireDatasource = "";
39

  
25 40
	/** The total. */
26 41
	private long total;
27 42

  
......
43 58
	private String projectCordaId;
44 59

  
45 60
	private static String RECORD_TEMPLATE = "<datasetsRecord><oaf:projectid xmlns:oaf=\"http://namespace.openaire.eu/oaf\">%s</oaf:projectid>"
46
			+ "<metadata>%s</metadata></datasetsRecord>";
61
			+ "<journal name='%s' issn='%s' datasourceid = '%s'/><metadata>%s</metadata></datasetsRecord>";
47 62

  
48 63
	/**
49 64
	 * Instantiates a new journal iterator.
......
51 66
	 * @param request
52 67
	 *            the request
53 68
	 */
54
	public DatasetsIterator(final RequestField request, final String projectCordaId) {
69
	public DatasetsIterator(final RequestField request, final String projectCordaId, final PangaeaJorunalInfo info) {
55 70
		this.request = request;
56 71
		this.setProjectCordaId(projectCordaId);
72

  
73
		if (info != null) {
74
			this.setJournalId(info.getJournalId());
75
			this.setJournalName(StringEscapeUtils.escapeXml(info.getJournalName()));
76
			this.setJournalISSN(info.getJournalISSN());
77
			this.setOpenaireDatasource(info.getDatasourceId());
78
		}
79
		log.debug("Start Iterator");
57 80
	}
58 81

  
59 82
	/**
......
67 90
	 */
68 91
	private String executeQuery(final int from, final int size) {
69 92
		try {
93
			log.debug("executing query " + this.request.getQuery().getTerm());
94
			log.debug(String.format("from:%d size:%d", from, size));
95

  
70 96
			HttpClient client = new HttpClient();
71 97

  
72 98
			PostMethod method = new PostMethod(String.format(BASE_URL_TEMPLATE, size, from));
......
114 140

  
115 141
	/*
116 142
	 * (non-Javadoc)
117
	 *
143
	 * 
118 144
	 * @see java.util.Iterator#hasNext()
119 145
	 */
120 146
	@Override
121 147
	public boolean hasNext() {
122
		return (from + currentIterator) < total;
148
		return from + currentIterator < total;
123 149
	}
124 150

  
125 151
	/*
126 152
	 * (non-Javadoc)
127
	 *
153
	 * 
128 154
	 * @see java.util.Iterator#next()
129 155
	 */
130 156
	@Override
131 157
	public String next() {
132
		String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, currentResponse.getXmlRecords().get(currentIterator));
158
		String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, this.journalName, this.journalISSN, this.openaireDatasource, currentResponse
159
				.getXmlRecords().get(currentIterator));
133 160
		currentIterator++;
134 161
		if (currentIterator == DEFAULT_SIZE) {
135 162
			getNextItem();
......
139 166

  
140 167
	/*
141 168
	 * (non-Javadoc)
142
	 *
169
	 * 
143 170
	 * @see java.util.Iterator#remove()
144 171
	 */
145 172
	@Override
......
170 197
		from += currentIterator;
171 198
		currentResponse = ElasticSearchResponse.createNewResponse(executeQuery(from, DEFAULT_SIZE));
172 199
		total = currentResponse.getTotal();
200
		log.debug("from : " + from + " total of the request is " + total);
173 201
		currentIterator = 0;
174 202
	}
175 203

  
......
188 216
		this.projectCordaId = projectCordaId;
189 217
	}
190 218

  
219
	/**
220
	 * @return the journalName
221
	 */
222
	public String getJournalName() {
223
		return journalName;
224
	}
225

  
226
	/**
227
	 * @param journalName
228
	 *            the journalName to set
229
	 */
230
	public void setJournalName(final String journalName) {
231
		this.journalName = journalName;
232
	}
233

  
234
	/**
235
	 * @return the journalISSN
236
	 */
237
	public String getJournalISSN() {
238
		return journalISSN;
239
	}
240

  
241
	/**
242
	 * @param journalISSN
243
	 *            the journalISSN to set
244
	 */
245
	public void setJournalISSN(final String journalISSN) {
246
		this.journalISSN = journalISSN;
247
	}
248

  
249
	/**
250
	 * @return the openaireDatasource
251
	 */
252
	public String getOpenaireDatasource() {
253
		return openaireDatasource;
254
	}
255

  
256
	/**
257
	 * @param openaireDatasource
258
	 *            the openaireDatasource to set
259
	 */
260
	public void setOpenaireDatasource(final String openaireDatasource) {
261
		this.openaireDatasource = openaireDatasource;
262
	}
263

  
191 264
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectIterator.java
54 54

  
55 55
	/*
56 56
	 * (non-Javadoc)
57
	 *
57
	 * 
58 58
	 * @see java.util.Iterator#hasNext()
59 59
	 */
60 60
	@Override
61 61
	public boolean hasNext() {
62 62
		// CASE WHEN WE REACH THE LAST ITEM ON CSV
63 63
		// OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM
64
		if (this.currentProject == null) return false;
64
		if (this.currentProject == null) { return false; }
65 65
		// IN THIS CASE WE HAVE ANOTHER DATASETS
66 66
		// FOR THE CURRENT PROJECT AND RETURN TRUE
67
		if ((currentIterator != null) && currentIterator.hasNext()) return true;
67
		if (currentIterator != null && currentIterator.hasNext()) { return true; }
68 68
		// OTHERWISE WE FINISHED TO ITERATE THE CURRENT
69 69
		// SETS OF DATASETS FOR A PARTICULAR PROJECT
70 70
		// SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH
......
75 75
			currentIterator = getNextIterator();
76 76
			// IF THE NEXT ITERATOR HAS ITEMS RETURN YES
77 77
			// OTHERWISE THE CICLE CONTINUE
78
			if (currentIterator.hasNext()) return true;
78
			if (currentIterator.hasNext()) { return true; }
79 79
			this.currentProject = extractNextLine();
80 80
		}
81 81
		return false;
......
84 84

  
85 85
	/*
86 86
	 * (non-Javadoc)
87
	 *
87
	 * 
88 88
	 * @see java.util.Iterator#next()
89 89
	 */
90 90
	@Override
......
94 94

  
95 95
	/*
96 96
	 * (non-Javadoc)
97
	 *
97
	 * 
98 98
	 * @see java.util.Iterator#remove()
99 99
	 */
100 100
	@Override
......
102 102

  
103 103
	/*
104 104
	 * (non-Javadoc)
105
	 *
105
	 * 
106 106
	 * @see java.lang.Iterable#iterator()
107 107
	 */
108 108
	@Override
......
120 120
		RequestField r = new RequestField();
121 121
		r.setQuery(q);
122 122
		q.getTerm().put("ft-techkeyword", this.currentProject.get(PROJECT_ID_KEY));
123
		return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY)).iterator();
123
		return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY), null).iterator();
124 124
	}
125 125

  
126 126
	/**
......
138 138
			return null;
139 139
		}
140 140
		// WE REACH THE END OF THE CSV
141
		if (line == null) return null;
141
		if (line == null) { return null; }
142 142
		log.debug("splitting line: " + line);
143 143
		String[] values = line.split(SPLIT_REGEX);
144
		if ((values == null) || (values.length != 4)) {
144
		if (values == null || values.length != 4) {
145 145
			log.error("Error on splitting line, the length must be 4");
146 146
			return null;
147 147
		}
......
152 152
		splittedMap.put(PROJECT_CORDA_ID_KEY, cordaId);
153 153
		splittedMap.put(PROJECT_ID_KEY, "project" + id);
154 154
		splittedMap.put(PROJECT_NAME_KEY, project_name);
155
		log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, ("project" + id)));
155
		log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, "project" + id));
156 156
		return splittedMap;
157 157
	}
158 158
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/hostedby/FindHostedByJonbNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes.hostedby;
2

  
3
import java.io.StringReader;
4

  
5
import javax.xml.ws.wsaddressing.W3CEndpointReference;
6

  
7
import org.dom4j.Document;
8
import org.dom4j.Element;
9
import org.dom4j.io.SAXReader;
10

  
11
import com.googlecode.sarasvati.Arc;
12
import com.googlecode.sarasvati.NodeToken;
13

  
14
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
15
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
16
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
17
import eu.dnetlib.miscutils.functional.UnaryFunction;
18
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
19

  
20
// TODO: Auto-generated Javadoc
21
/**
22
 * The Class FindHostedByJonbNode.
23
 */
24
public class FindHostedByJonbNode extends SimpleJobNode {
25

  
26
	/** The input epr param. */
27
	private String inputEprParam;
28

  
29
	/** The output epr param. */
30
	private String outputEprParam;
31

  
32
	/** The counters param. */
33
	private String countersParam;
34

  
35
	/** The result set client factory. */
36
	private ResultSetClientFactory resultSetClientFactory;
37

  
38
	/** The mapped result set factory. */
39
	private MappedResultSetFactory mappedResultSetFactory;
40

  
41
	private final String unknown_repo_id = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";
42

  
43
	/*
44
	 * (non-Javadoc)
45
	 *
46
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
47
	 */
48
	@Override
49
	protected String execute(final NodeToken token) throws Exception {
50
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam));
51
		final HostedByCounters counters = new HostedByCounters();
52

  
53
		final SAXReader reader = new SAXReader();
54

  
55
		final UnaryFunction<String, String> hostedByMapFunction = new UnaryFunction<String, String>() {
56

  
57
			@Override
58
			public String evaluate(final String input) {
59
				try {
60
					final Document doc = reader.read(new StringReader(input));
61
					final Element node = (Element) doc.selectSingleNode("//*[local-name()='hostedBy']");
62
					if (node != null) {
63
						String hostedById = node.attributeValue("id");
64
						if (!hostedById.equals(unknown_repo_id)) {
65
							counters.increaseCounter(hostedById);
66
						}
67
					}
68
				} catch (Exception e) {
69

  
70
				}
71

  
72
				return input;
73
			}
74
		};
75

  
76
		final W3CEndpointReference epr = mappedResultSetFactory.createMappedResultSet(inputEpr, hostedByMapFunction);
77
		token.getEnv().setAttribute(outputEprParam, epr.toString());
78
		token.getEnv().setTransientAttribute(getCountersParam(), counters);
79

  
80
		return Arc.DEFAULT_ARC;
81
	}
82

  
83
	/**
84
	 * @return the inputEprParam
85
	 */
86
	public String getInputEprParam() {
87
		return inputEprParam;
88
	}
89

  
90
	/**
91
	 * @param inputEprParam
92
	 *            the inputEprParam to set
93
	 */
94
	public void setInputEprParam(final String inputEprParam) {
95
		this.inputEprParam = inputEprParam;
96
	}
97

  
98
	/**
99
	 * @return the outputEprParam
100
	 */
101
	public String getOutputEprParam() {
102
		return outputEprParam;
103
	}
104

  
105
	/**
106
	 * @param outputEprParam
107
	 *            the outputEprParam to set
108
	 */
109
	public void setOutputEprParam(final String outputEprParam) {
110
		this.outputEprParam = outputEprParam;
111
	}
112

  
113
	/**
114
	 * @return the resultSetClientFactory
115
	 */
116
	public ResultSetClientFactory getResultSetClientFactory() {
117
		return resultSetClientFactory;
118
	}
119

  
120
	/**
121
	 * @param resultSetClientFactory
122
	 *            the resultSetClientFactory to set
123
	 */
124
	public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) {
125
		this.resultSetClientFactory = resultSetClientFactory;
126
	}
127

  
128
	/**
129
	 * @return the mappedResultSetFactory
130
	 */
131
	public MappedResultSetFactory getMappedResultSetFactory() {
132
		return mappedResultSetFactory;
133
	}
134

  
135
	/**
136
	 * @param mappedResultSetFactory
137
	 *            the mappedResultSetFactory to set
138
	 */
139
	public void setMappedResultSetFactory(final MappedResultSetFactory mappedResultSetFactory) {
140
		this.mappedResultSetFactory = mappedResultSetFactory;
141
	}
142

  
143
	/**
144
	 * @return the countersParam
145
	 */
146
	public String getCountersParam() {
147
		return countersParam;
148
	}
149

  
150
	/**
151
	 * @param countersParam
152
	 *            the countersParam to set
153
	 */
154
	public void setCountersParam(final String countersParam) {
155
		this.countersParam = countersParam;
156
	}
157

  
158
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/SplitDatasetRecord.java
39 39

  
40 40
	/*
41 41
	 * (non-Javadoc)
42
	 * 
42
	 *
43 43
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
44 44
	 */
45 45
	@Override
46 46
	protected String execute(final NodeToken token) throws Exception {
47
		final W3CEndpointReference inputEpr = (new EPRUtils()).getEpr(token.getEnv().getAttribute(inputEprParm));
47
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParm));
48 48
		Iterable<String> input = resultSetClientFactory.getClient(inputEpr);
49 49
		final LinkedBlockingQueue<String> publicationsQueue = new LinkedBlockingQueue<String>();
50 50
		final SplitterDatasetsIterator splitterIterator = new SplitterDatasetsIterator(publicationsQueue, input, "publications");
......
60 60

  
61 61
			@Override
62 62
			public Iterator<String> iterator() {
63
				return new IteratorOnQueue(publicationsQueue);
63
				return new IteratorOnQueue<String>(publicationsQueue, SplitterDatasetsIterator.END_QUEUE);
64 64
			}
65 65
		});
66 66

  
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByJournalPlugin.java
1
package eu.dnetlib.data.collector.plugins.datasets;
2

  
3
import java.io.IOException;
4
import java.io.InputStreamReader;
5
import java.net.URL;
6
import java.util.ArrayList;
7
import java.util.List;
8

  
9
import eu.dnetlib.data.collector.plugin.CollectorPlugin;
10
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
11
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
12

  
13
public class DatasetsByJournalPlugin implements CollectorPlugin {
14

  
15
	@Override
16
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
17
			throws CollectorServiceException {
18
		try {
19
			URL url = new URL(interfaceDescriptor.getBaseUrl());
20
			url.openConnection();
21
			InputStreamReader reader = new InputStreamReader(url.openStream());
22
			DatasetsByProjectIterator iterator = new DatasetsByProjectIterator(reader);
23
			return iterator;
24
		} catch (IOException e) {
25
			throw new CollectorServiceException("OOOPS something bad happen on creating iterator ", e);
26
		}
27

  
28
	}
29

  
30
	@Override
31
	public String getProtocol() {
32

  
33
		return "datasetsbyjournal";
34
	}
35

  
36
	@Override
37
	public List<String> listNameParameters() {
38

  
39
		return new ArrayList<String>();
40
	}
41

  
42
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/CollectDatasetsByJournalJobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite;
2

  
3
import java.io.StringReader;
4
import java.util.concurrent.ArrayBlockingQueue;
5
import java.util.concurrent.BlockingQueue;
6
import java.util.concurrent.Executor;
7
import java.util.concurrent.Executors;
8

  
9
import javax.annotation.Resource;
10
import javax.xml.ws.wsaddressing.W3CEndpointReference;
11

  
12
import org.apache.commons.lang.StringUtils;
13
import org.dom4j.Document;
14
import org.dom4j.io.SAXReader;
15

  
16
import com.googlecode.sarasvati.Arc;
17
import com.googlecode.sarasvati.NodeToken;
18

  
19
import eu.dnetlib.data.collector.plugins.datasets.DatasetsByJournalIterator;
20
import eu.dnetlib.data.collector.plugins.datasets.PangaeaJorunalInfo;
21
import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
22
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
23
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
24
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
25

  
26
// TODO: Auto-generated Javadoc
27
/**
28
 * The Class CollectDatasetsByJournalJobNode.
29
 */
30
public class CollectDatasetsByJournalJobNode extends SimpleJobNode {
31

  
32
	public static PangaeaJorunalInfo END_QUEUE = new PangaeaJorunalInfo();
33

  
34
	/** The datasource id. */
35
	private String datasourceId;
36

  
37
	/** The output epr param. */
38
	private String outputEprParam;
39

  
40
	/** The input epr param. */
41
	private String inputEprParam;
42

  
43
	/** The result set client factory. */
44
	private ResultSetClientFactory resultSetClientFactory;
45

  
46
	/** The result set factory. */
47
	@Resource(name = "iterableResultSetFactory")
48
	private IterableResultSetFactory resultSetFactory;
49

  
50
	/** The executor. */
51
	private Executor executor = Executors.newSingleThreadExecutor();
52

  
53
	/*
54
	 * (non-Javadoc)
55
	 * 
56
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
57
	 */
58
	@Override
59
	protected String execute(final NodeToken token) throws Exception {
60
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam));
61
		final Iterable<String> input = resultSetClientFactory.getClient(inputEpr);
62
		final BlockingQueue<PangaeaJorunalInfo> publicationsQueue = new ArrayBlockingQueue<PangaeaJorunalInfo>(500);
63

  
64
		executor.execute(new Runnable() {
65

  
66
			@Override
67
			public void run() {
68
				final SAXReader reader = new SAXReader();
69

  
70
				for (String inputString : input) {
71
					try {
72
						Document doc = reader.read(new StringReader(inputString));
73
						final String entry = doc.valueOf("//FIELD[@name='id']");
74
						final String dsId = doc.valueOf("//FIELD[@name='datasource']");
75
						final String dsName = doc.valueOf("//FIELD[@name='name']");
76
						String jISSN = StringUtils.substringBefore(entry, "__");
77
						String jId = StringUtils.substringAfter(entry, "__");
78
						PangaeaJorunalInfo info = new PangaeaJorunalInfo();
79
						info.setDatasourceId(dsId);
80
						info.setJournalId(jId);
81
						info.setJournalName(dsName);
82
						info.setJournalISSN(jISSN);
83
						publicationsQueue.put(info);
84
					} catch (Exception e) {
85

  
86
					}
87
				}
88
				try {
89
					publicationsQueue.put(END_QUEUE);
90
				} catch (InterruptedException e) {
91

  
92
				}
93

  
94
			}
95
		});
96

  
97
		IteratorOnQueue<PangaeaJorunalInfo> itOnQueue = new IteratorOnQueue<PangaeaJorunalInfo>(publicationsQueue, CollectDatasetsByJournalJobNode.END_QUEUE);
98

  
99
		W3CEndpointReference eprOutput = resultSetFactory.createIterableResultSet(new DatasetsByJournalIterator(itOnQueue));
100
		token.getEnv().setAttribute(getOutputEprParam(), eprOutput.toString());
101
		return Arc.DEFAULT_ARC;
102
	}
103

  
104
	/**
105
	 * Gets the datasource id.
106
	 *
107
	 * @return the datasourceId
108
	 */
109
	public String getDatasourceId() {
110
		return datasourceId;
111
	}
112

  
113
	/**
114
	 * Sets the datasource id.
115
	 *
116
	 * @param datasourceId
117
	 *            the datasourceId to set
118
	 */
119
	public void setDatasourceId(final String datasourceId) {
120
		this.datasourceId = datasourceId;
121
	}
122

  
123
	/**
124
	 * @return the outputEprParam
125
	 */
126
	public String getOutputEprParam() {
127
		return outputEprParam;
128
	}
129

  
130
	/**
131
	 * @param outputEprParam
132
	 *            the outputEprParam to set
133
	 */
134
	public void setOutputEprParam(final String outputEprParam) {
135
		this.outputEprParam = outputEprParam;
136
	}
137

  
138
	/**
139
	 * @return the inputEprParam
140
	 */
141
	public String getInputEprParam() {
142
		return inputEprParam;
143
	}
144

  
145
	/**
146
	 * @param inputEprParam
147
	 *            the inputEprParam to set
148
	 */
149
	public void setInputEprParam(final String inputEprParam) {
150
		this.inputEprParam = inputEprParam;
151
	}
152

  
153
	/**
154
	 * Gets the result set client factory.
155
	 *
156
	 * @return the resultSetClientFactory
157
	 */
158
	public ResultSetClientFactory getResultSetClientFactory() {
159
		return resultSetClientFactory;
160
	}
161

  
162
	/**
163
	 * Sets the result set client factory.
164
	 *
165
	 * @param resultSetClientFactory
166
	 *            the resultSetClientFactory to set
167
	 */
168
	public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) {
169
		this.resultSetClientFactory = resultSetClientFactory;
170
	}
171

  
172
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/IteratorOnQueue.java
1 1
package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite;
2 2

  
3 3
import java.util.Iterator;
4
import java.util.concurrent.LinkedBlockingQueue;
4
import java.util.concurrent.BlockingQueue;
5 5

  
6 6
import org.apache.commons.logging.Log;
7 7
import org.apache.commons.logging.LogFactory;
8 8

  
9
public class IteratorOnQueue implements Iterator<String> {
9
/**
10
 * The Class IteratorOnQueue.
11
 *
12
 * @param <T>
13
 *            the generic type
14
 */
15
public class IteratorOnQueue<T> implements Iterator<T> {
10 16

  
11 17
	/** The Constant log. */
12 18
	private static final Log log = LogFactory.getLog(IteratorOnQueue.class);
13
	private final LinkedBlockingQueue<String> inputQueue;
14
	private String currentItem;
15 19

  
16
	public IteratorOnQueue(final LinkedBlockingQueue<String> inputQueue) {
20
	/** The input queue. */
21
	private final BlockingQueue<T> inputQueue;
22

  
23
	/** The current item. */
24
	private T currentItem;
25

  
26
	/** The end queue. */
27
	private T endQueue;
28

  
29
	/**
30
	 * Instantiates a new iterator on queue.
31
	 *
32
	 * @param inputQueue
33
	 *            the input queue
34
	 * @param endQueue
35
	 *            the end queue
36
	 */
37
	public IteratorOnQueue(final BlockingQueue<T> inputQueue, final T endQueue) {
17 38
		this.inputQueue = inputQueue;
39
		this.endQueue = endQueue;
40

  
18 41
		try {
19 42
			currentItem = this.inputQueue.take();
20 43
		} catch (InterruptedException e) {
......
22 45
		}
23 46
	}
24 47

  
48
	/**
49
	 * Checks for next.
50
	 *
51
	 * @return true, if successful
52
	 */
25 53
	@Override
26 54
	public boolean hasNext() {
27 55

  
28
		return (currentItem != SplitterDatasetsIterator.END_QUEUE);
56
		return currentItem != this.endQueue;
29 57
	}
30 58

  
59
	/**
60
	 * Next.
61
	 *
62
	 * @return the t
63
	 */
31 64
	@Override
32
	public String next() {
65
	public T next() {
33 66

  
34
		String previous = currentItem;
67
		T previous = currentItem;
35 68
		try {
36 69
			currentItem = this.inputQueue.take();
37 70
		} catch (Exception e) {
......
40 73
		return previous;
41 74
	}
42 75

  
76
	/**
77
	 * Removes the.
78
	 */
43 79
	@Override
44 80
	public void remove() {
45 81
		// TODO Auto-generated method stub
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/datasets_by_journal.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="884e520e-f141-4a17-a0d4-98f8222ef2b0_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="value3" />
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>Datasets by journal</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
17
				<DESCRIPTION>Retrieve datasets by projects id</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">datarepository::unknown</PARAM>
20
					<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="createMetaWf"/>
24
					<ARC to="validateDs" name="validateDs"/>
25
				</ARCS>
26
			</NODE>
27
		
28
			<NODE name="validateDs" type="ValidateDatasource">
29
				<DESCRIPTION>Validate DS</DESCRIPTION>
30
				<PARAMETERS/>
31
				<ARCS>
32
					<ARC to="createMetaWf"/>
33
				</ARCS>
34
			</NODE>
35
			
36
			<NODE name="createMetaWf" type="RegisterMetaWf">
37
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
38
				<PARAMETERS>
39
					<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM>
40
				</PARAMETERS>
41
				<ARCS>
42
					<ARC to="createDatacite"/>
43
					<ARC to="createODF"/>
44
					<ARC to="createOAF"/>
45
				</ARCS>
46
			</NODE>
47
			
48
			<NODE name="createDatacite" type="CreateMDStore">
49
				<DESCRIPTION>Create dataset native store</DESCRIPTION>
50
				<PARAMETERS>
51
					<PARAM name="format" managedBy="system" required="true" type="string">dataset</PARAM>
52
					<PARAM name="interpretation" managedBy="system" required="true" type="string">native</PARAM>
53
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
54
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">harv_</PARAM>
55
				</PARAMETERS>
56
				<ARCS>
57
					<ARC to="updateMetaWf" />
58
				</ARCS>
59
			</NODE>
60
			<NODE name="createODF" type="CreateMDStore">
61
				<DESCRIPTION>Create ODF_dataset cleaned store</DESCRIPTION>
62
				<PARAMETERS>
63
					<PARAM name="format" managedBy="system" required="true" type="string">ODF</PARAM>
64
					<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM>
65
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
66
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tranODF_</PARAM>
67
				</PARAMETERS>
68
				<ARCS>
69
					<ARC to="updateMetaWf" />
70
				</ARCS>
71
			</NODE>
72
			<NODE name="createOAF" type="CreateMDStore">
73
				<DESCRIPTION>Create OAF_publication cleaned store</DESCRIPTION>
74
				<PARAMETERS>
75
					<PARAM name="format" managedBy="system" required="true" type="string">OAF</PARAM>
76
					<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM>
77
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
78
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tranOAF_</PARAM>
79
				</PARAMETERS>
80
				<ARCS>
81
					<ARC to="updateMetaWf" />
82
				</ARCS>
83
			</NODE>
84
						
85
			<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true">
86
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetbyJournalsOpenaireMdRecords</PARAM>
89
				</PARAMETERS>
90
				<ARCS>
91
					<ARC to="updateMetaWfStatus" />
92
				</ARCS>
93
			</NODE>
94
			
95
			<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus">
96
				<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION>
97
				<PARAMETERS />
98
				<ARCS>
99
					<ARC to="success" />
100
				</ARCS>
101
			</NODE>
102
		</CONFIGURATION>
103
		<STATUS />
104
	</BODY>
105
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/aggregatordataRepository_default_ingestion.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="95b4317d0-a9d4-4ced-862b-df1182f35a8e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="value3" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>aggregator datarepository metadata records ingestion</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
16
				<DESCRIPTION>Verify if DS is pending</DESCRIPTION>
17
				<PARAMETERS>
18
					<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">aggregator::datarepository</PARAM>
19
					<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="createMetaWf"/>
23
					<ARC to="validateDs" name="validateDs"/>
24
				</ARCS>
25
			</NODE>
26
		
27
			<NODE name="validateDs" type="ValidateDatasource">
28
				<DESCRIPTION>Validate DS</DESCRIPTION>
29
				<PARAMETERS/>
30
				<ARCS>
31
					<ARC to="createMetaWf"/>
32
				</ARCS>
33
			</NODE>
34
			
35
			<NODE name="createMetaWf" type="RegisterMetaWf">
36
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
37
				<PARAMETERS>
38
					<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM>
39
				</PARAMETERS>
40
				<ARCS>
41
					<ARC to="createDatacite"/>
42
					<ARC to="createODF"/>
43
				</ARCS>
44
			</NODE>
45
			
46
			<NODE name="createDatacite" type="CreateMDStore">
47
				<DESCRIPTION>Create oai_datacite native store</DESCRIPTION>
48
				<PARAMETERS>
49
					<PARAM name="format" managedBy="system" required="true" type="string">oai_datacite</PARAM>
50
					<PARAM name="interpretation" managedBy="system" required="true" type="string">native</PARAM>
51
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
52
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">harv_</PARAM>
53
				</PARAMETERS>
54
				<ARCS>
55
					<ARC to="updateMetaWf" />
56
				</ARCS>
57
			</NODE>
58
			<NODE name="createODF" type="CreateMDStore">
59
				<DESCRIPTION>Create ODF_datacite cleaned store</DESCRIPTION>
60
				<PARAMETERS>
61
					<PARAM name="format" managedBy="system" required="true" type="string">ODF</PARAM>
62
					<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM>
63
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
64
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tran_</PARAM>
65
				</PARAMETERS>
66
				<ARCS>
67
					<ARC to="updateMetaWf" />
68
				</ARCS>
69
			</NODE>
70
						
71
			<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true">
72
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
73
				<PARAMETERS>
74
					<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfaggregatorDataciteOpenaireMdRecords</PARAM>
75
				</PARAMETERS>
76
				<ARCS>
77
					<ARC to="updateMetaWfStatus" />
78
				</ARCS>
79
			</NODE>
80
			
81
			<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus">
82
				<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION>
83
				<PARAMETERS />
84
				<ARCS>
85
					<ARC to="success" />
86
				</ARCS>
87
			</NODE>
88
		</CONFIGURATION>
89
		<STATUS />
90
	</BODY>
91
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/pangaeadatasets_by_journal.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="61995a63-5922-4fac-be67-5970bab0095d_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="value3" />
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>HostedBy Map Pangaea Journal</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
17
				<DESCRIPTION>Retrieve datasets by projects id</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">datarepository::unknown</PARAM>
20
					<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="createMetaWf"/>
24
					<ARC to="validateDs" name="validateDs"/>
25
				</ARCS>
26
			</NODE>
27
		
28
			<NODE name="validateDs" type="ValidateDatasource">
29
				<DESCRIPTION>Validate DS</DESCRIPTION>
30
				<PARAMETERS/>
31
				<ARCS>
32
					<ARC to="createMetaWf"/>
33
				</ARCS>
34
			</NODE>
35
			
36
			<NODE name="createMetaWf" type="RegisterMetaWf">
37
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
38
				<PARAMETERS>
39
					<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM>
40
				</PARAMETERS>
41
				<ARCS>
42
					<ARC to="updateMetaWf"/>					
43
				</ARCS>
44
			</NODE>				
45
						
46
			<NODE name="updateMetaWf" type="UpdateMetaWf" >
47
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
48
				<PARAMETERS>
49
					<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfHostedByMapJournalPangaea</PARAM>
50
				</PARAMETERS>
51
				<ARCS>
52
					<ARC to="updateMetaWfStatus" />
53
				</ARCS>
54
			</NODE>
55
			
56
			<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus">
57
				<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION>
58
				<PARAMETERS />
59
				<ARCS>
60
					<ARC to="success" />
61
				</ARCS>
62
			</NODE>
63
		</CONFIGURATION>
64
		<STATUS />
65
	</BODY>
66
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/datasets_by_projects.xml
85 85
			<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true">
86 86
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
87 87
				<PARAMETERS>
88
					<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetOpenaireMdRecords</PARAM>
88
					<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetbyProjectsOpenaireMdRecords</PARAM>
89 89
				</PARAMETERS>
90 90
				<ARCS>
91 91
					<ARC to="updateMetaWfStatus" />
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt-datacite/DatasetfromPangaeaTransform.xsl
121 121
                        </contributor>
122 122
                    </contributors>
123 123
                </resource>
124
                 <xsl:variable name="journalName"><xsl:value-of select="//*[local-name() ='journal']/@name"/></xsl:variable>
125
                    <xsl:variable name="journalISSN"><xsl:value-of select="//*[local-name() ='journal']/@issn"/></xsl:variable>
126
                    <xsl:variable name="journalDSId"><xsl:value-of select="//*[local-name() ='journal']/@datasourceid"/></xsl:variable>
127
                    
128
                    <xsl:choose>
129
                        <xsl:when test="string-length($journalISSN) &gt; 0">
130
                            <oaf:journal issn="{$journalISSN}" eissn="" >                                                              
131
                               <xsl:value-of select="$journalName"/>
132
                            </oaf:journal>                          
133
                            <oaf:hostedBy>
134
                                <xsl:attribute name="id"><xsl:value-of select="$journalDSId"/></xsl:attribute>
135
                                <xsl:attribute name="name"><xsl:value-of select="$journalName"/></xsl:attribute>
136
                            </oaf:hostedBy>                                        
137
                        </xsl:when>
138
                        <xsl:otherwise>                            
139
                            <oaf:hostedBy
140
                                id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18"
141
                                name="Unknown Repository"/>      
142
                        </xsl:otherwise>                        
143
                    </xsl:choose>
124 144
                <xsl:for-each select="//md:citation/md:supplementTo/@id">
125 145
                    <xsl:variable name="publicationID">
126 146
                        <xsl:value-of select="."/>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt-datacite/PublicationFromPangaeatransform.xsl
1 1
<?xml version="1.0" encoding="UTF-8"?>
2 2
<xsl:stylesheet version="1.0"
3
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
                xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oai="http://www.openarchives.org/OAI/2.0/"
5
                xmlns:datetime="http://exslt.org/dates-and-times"
6
                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
7
                xmlns:md="http://www.pangaea.de/MetaData" xmlns:oaa="http://namespace.openaire.eu/oaa"
8
                xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:fn="http://www.w3.org/2005/xpath-functions"
9
                xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions"
10
                xmlns:stringUtils="org.apache.commons.lang.StringUtils"
11
                exclude-result-prefixes="xsl dnet oaa fn stringUtils datetime">
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oai="http://www.openarchives.org/OAI/2.0/"
5
	xmlns:datetime="http://exslt.org/dates-and-times" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
6
	xmlns:md="http://www.pangaea.de/MetaData" xmlns:oaa="http://namespace.openaire.eu/oaa"
7
	xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:fn="http://www.w3.org/2005/xpath-functions"
8
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions"
9
	xmlns:stringUtils="org.apache.commons.lang.StringUtils"
10
	exclude-result-prefixes="xsl dnet oaa fn stringUtils datetime">
12 11

  
13
    <xsl:param name="namespacePrefix"/>
14
    <xsl:param name="dataprovider_id"/>
15
    <xsl:param name="parentDatasourceId"/>
16
    <xsl:param name="dataprovider_name"/>
12
	<xsl:param name="namespacePrefix" />
13
	<xsl:param name="dataprovider_id" />
14
	<xsl:param name="parentDatasourceId" />
15
	<xsl:param name="dataprovider_name" />
17 16

  
18
    <xsl:template match="/">
19
        <xsl:variable name="identifier">
20
            <xsl:value-of select="stringUtils:substringAfter(//md:citation/md:URI,'doi:')"/>
21
        </xsl:variable>
22
        <xsl:variable name="identifier_datacite">
23
            <xsl:value-of select="oai:record/oai:header/dri:objIdentifier"/>
24
        </xsl:variable>
25
        <publications>
26
            <xsl:for-each select="//md:citation/md:supplementTo">
27
                <xsl:variable name="pub_identifier">
28
                    <xsl:value-of select="./@id"/>
29
                </xsl:variable>
30
                <xsl:choose>
31
                    <xsl:when test="string-length($pub_identifier)">
32
                        <xsl:call-template name="GeneratePublication">
33
                            <xsl:with-param name="pub_identifier" select="$pub_identifier"/>
34
                        </xsl:call-template>
35
                    </xsl:when>
36
                </xsl:choose>
37
            </xsl:for-each>
38
        </publications>
39
    </xsl:template>
40
    <xsl:template name="GeneratePublication">
41
        <xsl:param name="pub_identifier"/>
42
        <publication>
43
            <oai:record>
44
                <oai:header>
45
                    <dri:objIdentifier>
46
                        <xsl:value-of
47
                                select="concat($namespacePrefix,'::', dnet:md5($pub_identifier))"/>
48
                    </dri:objIdentifier>
49
                    <dri:recordIdentifier>
50
                        <xsl:value-of select="$pub_identifier"/>
51
                    </dri:recordIdentifier>
52
                    <dri:dateOfCollection>
53
                        <xsl:value-of select="datetime:dateTime()"/>
54
                    </dri:dateOfCollection>
55
                    <dri:repositoryId>
56
                        <xsl:value-of select="$dataprovider_id"/>
57
                    </dri:repositoryId>
58
                    <oaf:datasourceprefix>
59
                        <xsl:value-of select="$namespacePrefix"/>
60
                    </oaf:datasourceprefix>
61
                </oai:header>
62
                <oai:metadata>
63
                    <dc:identifier>
64
                        <xsl:value-of select="$pub_identifier"/>
65
                    </dc:identifier>
66
                    <xsl:variable name="publicationDOI">
67
                        <xsl:value-of select="./md:URI" />
68
                    </xsl:variable>
69
                    <xsl:choose>
70
                        <xsl:when test="string-length($publicationDOI) &gt; 0">
71
                        <dc:identifier>
72
                            <xsl:value-of select="$publicationDOI" />
73
                        </dc:identifier>
74
                        </xsl:when>
75
                    </xsl:choose>
76
                    
77
                    <dc:title>
78
                        <xsl:value-of select=".//md:title"/>
79
                    </dc:title>
80
                    <xsl:for-each select=".//md:author">
81
                        <dc:creator>
82
                            <xsl:value-of select="concat(./md:lastName,' ', md:firstName)"/>
83
                        </dc:creator>
84
                    </xsl:for-each>
85
                    <dc:source>
86
                        <xsl:value-of select="./md:source"/>
87
                    </dc:source>
88
                    <dr:CobjCategory>0000</dr:CobjCategory>
89
                    <dc:language>und</dc:language>
90
                    <oaf:journal>
91
                        <xsl:value-of select="./md:source"/>
92
                    </oaf:journal>
93
                    <oaf:accessrights>UNKNOWN</oaf:accessrights>
94
                    <xsl:variable name="projectId">
95
                        <xsl:value-of select="normalize-space(//*[local-name() ='projectid'])"/>
96
                    </xsl:variable>
97
                    <xsl:choose>
98
                        <xsl:when test="string-length($projectId) &gt; 0">
99
                            <oaf:projectid>
100
                                <xsl:value-of
101
                                        select="concat('corda_______::', stringUtils:substringAfterLast($projectId, '/'))"/>
102
                            </oaf:projectid>
103
                        </xsl:when>
104
                    </xsl:choose>
105
                    <oaf:hostedBy
106
                            id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18"
107
                            name="Unknown Repository"/>
108
                    <oaf:collectedFrom>
109
                        <xsl:attribute name="id">
110
                            <xsl:value-of select="$parentDatasourceId"/>
17
	<xsl:template match="/">
18
		<xsl:variable name="identifier">
19
			<xsl:value-of
20
				select="stringUtils:substringAfter(//md:citation/md:URI,'doi:')" />
21
		</xsl:variable>
22
		<xsl:variable name="identifier_datacite">
23
			<xsl:value-of select="oai:record/oai:header/dri:objIdentifier" />
24
		</xsl:variable>
25
		<publications>
26
			<xsl:for-each select="//md:citation/md:supplementTo">
27
				<xsl:variable name="pub_identifier">
28
					<xsl:value-of select="./@id" />
29
				</xsl:variable>
30
				<xsl:choose>
31
					<xsl:when test="string-length($pub_identifier)">
32
						<xsl:call-template name="GeneratePublication">
33
							<xsl:with-param name="pub_identifier" select="$pub_identifier" />
34
						</xsl:call-template>
35
					</xsl:when>
36
				</xsl:choose>
37
			</xsl:for-each>
38
		</publications>
39
	</xsl:template>
40
	<xsl:template name="GeneratePublication">
41
		<xsl:param name="pub_identifier" />
42
		<publication>
43
			<oai:record>
44
				<oai:header>
45
					<dri:objIdentifier>
46
						<xsl:value-of
47
							select="concat($namespacePrefix,'::', dnet:md5($pub_identifier))" />
48
					</dri:objIdentifier>
49
					<dri:recordIdentifier>
50
						<xsl:value-of select="$pub_identifier" />
51
					</dri:recordIdentifier>
52
					<dri:dateOfCollection>
53
						<xsl:value-of select="datetime:dateTime()" />
54
					</dri:dateOfCollection>
55
					<dri:repositoryId>
56
						<xsl:value-of select="$dataprovider_id" />
57
					</dri:repositoryId>
58
					<oaf:datasourceprefix>
59
						<xsl:value-of select="$namespacePrefix" />
60
					</oaf:datasourceprefix>
61
				</oai:header>
62
				<oai:metadata>
63
					<dc:identifier>
64
						<xsl:value-of select="$pub_identifier" />
65
					</dc:identifier>
66
					<xsl:variable name="publicationDOI">
67
						<xsl:value-of select="./md:URI" />
68
					</xsl:variable>
69
					<xsl:choose>
70
						<xsl:when test="string-length($publicationDOI) &gt; 0">
71
							<dc:identifier>
72
								<xsl:value-of select="$publicationDOI" />
73
							</dc:identifier>
74
						</xsl:when>
75
					</xsl:choose>
76

  
77
					<dc:title>
78
						<xsl:value-of select=".//md:title" />
79
					</dc:title>
80
					<xsl:for-each select=".//md:author">
81
						<dc:creator>
82
							<xsl:value-of select="concat(./md:lastName,' ', md:firstName)" />
83
						</dc:creator>
84
					</xsl:for-each>
85
					<dc:source>
86
						<xsl:value-of select="./md:source" />
87
					</dc:source>
88
					<dr:CobjCategory>0000</dr:CobjCategory>
89
					<dc:language>und</dc:language>
90
					<oaf:journal>
91
						<xsl:value-of select="./md:source" />
92
					</oaf:journal>
93
					<oaf:accessrights>UNKNOWN</oaf:accessrights>
94
					<xsl:variable name="projectId">
95
						<xsl:value-of select="normalize-space(//*[local-name() ='projectid'])" />
96
					</xsl:variable>
97
					<xsl:choose>
98
						<xsl:when test="string-length($projectId) &gt; 0">
99
							<oaf:projectid>
100
								<xsl:value-of
101
									select="concat('corda_______::', stringUtils:substringAfterLast($projectId, '/'))" />
102
							</oaf:projectid>
103
						</xsl:when>
104
					</xsl:choose>
105
					<xsl:variable name="journalName">
106
						<xsl:value-of select="//*[local-name() ='journal']/@name" />
107
					</xsl:variable>
108
					<xsl:variable name="journalISSN">
109
						<xsl:value-of select="//*[local-name() ='journal']/@issn" />
110
					</xsl:variable>
111
					<xsl:variable name="journalDSId">
112
						<xsl:value-of select="//*[local-name() ='journal']/@datasourceid" />
113
					</xsl:variable>
114

  
115
					<xsl:choose>
116
						<xsl:when test="string-length($journalISSN) &gt; 0">
117
							<oaf:journal issn="{$journalISSN}" eissn="">
118
								<xsl:value-of select="$journalName" />
119
							</oaf:journal>
120
							<oaf:hostedBy>
121
								<xsl:attribute name="id"><xsl:value-of
122
									select="$journalDSId" /></xsl:attribute>
123
								<xsl:attribute name="name"><xsl:value-of
124
									select="$journalName" /></xsl:attribute>
125
							</oaf:hostedBy>
126
						</xsl:when>
127
						<xsl:otherwise>
128
							<oaf:hostedBy
129
								id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" name="Unknown Repository" />
130
						</xsl:otherwise>
131
					</xsl:choose>
132

  
133

  
134

  
135

  
136
					<oaf:collectedFrom>
137
						<xsl:attribute name="id">
138
                            <xsl:value-of select="$parentDatasourceId" />
111 139
                        </xsl:attribute>
112
                        <xsl:attribute name="name">
113
                            <xsl:value-of select="$dataprovider_name"/>
140
						<xsl:attribute name="name">
141
                            <xsl:value-of select="$dataprovider_name" />
114 142
                        </xsl:attribute>
115
                    </oaf:collectedFrom>
116
                    <oaf:about>
117
                        <oaf:datainfo>
118
                            <oaf:inferred>false</oaf:inferred>
119
                            <oaf:deletedbyinference>false</oaf:deletedbyinference>
120
                            <oaf:trust>0.9</oaf:trust>
121
                            <oaf:inferenceprovenance/>
122
                            <oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
123
                                                  classname="sysimport:crosswalk:datasetarchive"
124
                                                  schemeid="dnet:provenanceActions"
125
                                                  schemename="dnet:provenanceActions"/>
126
                        </oaf:datainfo>
127
                    </oaf:about>
128
                </oai:metadata>
129
            </oai:record>
130
        </publication>
131
    </xsl:template>
143
					</oaf:collectedFrom>
144
					<oaf:about>
145
						<oaf:datainfo>
146
							<oaf:inferred>false</oaf:inferred>
147
							<oaf:deletedbyinference>false</oaf:deletedbyinference>
148
							<oaf:trust>0.9</oaf:trust>
149
							<oaf:inferenceprovenance />
150
							<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
151
								classname="sysimport:crosswalk:datasetarchive" schemeid="dnet:provenanceActions"
152
								schemename="dnet:provenanceActions" />
153
						</oaf:datainfo>
154
					</oaf:about>
155
				</oai:metadata>
156
			</oai:record>
157
		</publication>
158
	</xsl:template>
132 159
</xsl:stylesheet>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/doaj_journals/sql/doajJournals_findMatches.sql
1 1
INSERT INTO hostedby_map(_dnet_resource_identifier_, oa_source_id, entry, datasourceid)
2 2
SELECT   d.issn||'@@'||d.collectedfrom, d.collectedfrom, d.issn, d.id 
3 3
  FROM  datasources d  where 
4
	d.collectedfrom ='openaire____::doaj' and d.issn is not null
4
	d.collectedfrom ='driver______::1790119e-d281-4b7a-aedf-866d1d853a07' and d.issn is not null and d.id like 'doajarticles::%'
5 5
	AND 
6 6
	(d.collectedfrom, d.issn) NOT IN
7
		(SELECT oa_source_id, entry from hostedby_map)
7
		(SELECT oa_source_id, entry from hostedby_map)
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt/doaj2db.xsl
62 62
                        <FIELD name="namespaceprefix">
63 63
                            <xsl:value-of select="concat('doaj', $ISSNNS)"/>
64 64
                        </FIELD>
65
                        <FIELD name="datasourceclass">pubsrepository::journal</FIELD>
66
                        <FIELD name="datasourcescheme">dnet:datasource_typologies</FIELD>
65
                        <FIELD name="datasourceclass">pubsrepository::journal</FIELD>                        
67 66
                        <FIELD name="collectedfrom">
68 67
                            <xsl:value-of select="$parentDatasourceId"/>
69 68
                        </FIELD>
70
                    </ROW>
71

  
72
                    <xsl:variable name="apiId"
73
                        select="concat('api_________::', $datasourceId, '::0')"/>
74
                    <ROW table="api">
75
                        <FIELD name="id">
76
                            <xsl:value-of select="$apiId"/>
77
                        </FIELD>
78
                        <FIELD name="_dnet_resource_identifier_">
79
                            <xsl:value-of select="$apiId"/>
80
                        </FIELD>
81
                        <FIELD name="protocolclass">filesystem</FIELD>
82
                        <FIELD name="datasource">
83
                            <xsl:value-of select="$datasourceId"/>
84
                        </FIELD>
85
                        <FIELD name="contentdescriptionclass">metadata</FIELD>
86
                        <FIELD name="typologyclass">pubsrepository::journal</FIELD>
87
                        <FIELD name="compatibilityclass">UNKNOWN</FIELD>
88
                        
89
                    </ROW>
90

  
91
                    <ROW table="apicollections">
92
                        <FIELD name="api">
93
                            <xsl:value-of select="$apiId"/>
94
                        </FIELD>
95
                        <FIELD name="param">baseUrl</FIELD>
96
                        <FIELD name="_dnet_resource_identifier_">
97
                            <xsl:value-of select="concat($apiId, '@@baseUrl')"/>
98
                        </FIELD>
99
                        <xsl:if test="string-length(normalize-space('/dev/null')) &gt; 0">
100
                            <FIELD name="original">
101
                                <xsl:value-of select="normalize-space('/dev/null')"/>
102
                            </FIELD>
103
                        </xsl:if>
104
                    </ROW>
105

  
106

  
107
                    <ROW table="apicollections">
108
                        <FIELD name="api">
109
                            <xsl:value-of select="$apiId"/>
110
                        </FIELD>
111
                        <FIELD name="param">metadata_identifier_path</FIELD>
112
                        <FIELD name="_dnet_resource_identifier_">
113
                            <xsl:value-of select="concat($apiId, '@@metadata_identifier_path')"/>
114
                        </FIELD>
115
                        <FIELD name="original"
116
                            >//*[local-name()='header']/*[local-name()='identifier']</FIELD>
117
                        <FIELD name="accessparam" type="boolean">false</FIELD>
118
                    </ROW>
119

  
120

  
121

  
69
                    </ROW>       
122 70
                </ROWS>
123 71
            </metadata>
124 72
        </record>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/applicationContext-repohi.xml
1 1
<?xml version="1.0" encoding="UTF-8"?>
2 2

  
3 3
<beans xmlns="http://www.springframework.org/schema/beans"
4
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
5
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
6
	
7
	
8
	
9
	<bean id="metaWfOpenaireAggregatorPubsMdRecords" 
10
		class="eu.dnetlib.msro.workflows.metawf.DatasourceMetaWorkflow"
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff