Revision 32940
Added by Sandro La Bruzzo about 10 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/doaj_journals/sql/doajJournals_prepareTables.sql | ||
---|---|---|
1 |
DROP TABLE IF EXISTS doaj_temp_journal; |
|
2 |
CREATE TABLE IF NOT EXISTS doaj_temp_journal ( |
|
3 |
_dnet_resource_identifier_ character varying(2048) DEFAULT 'temp_'||md5(clock_timestamp()::text)||'_'||md5(random()::text), |
|
4 |
id character varying(255) PRIMARY KEY, |
|
5 |
journalname character varying(255), |
|
6 |
issn character varying(255) NOT NULL, |
|
7 |
oa_source_id character varying(255) references datasources(id) |
|
8 |
); |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsIterator.java | ||
---|---|---|
7 | 7 |
import org.apache.commons.httpclient.methods.PostMethod; |
8 | 8 |
import org.apache.commons.httpclient.methods.StringRequestEntity; |
9 | 9 |
import org.apache.commons.io.IOUtils; |
10 |
import org.apache.commons.lang.StringEscapeUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
10 | 13 |
|
11 | 14 |
import com.google.gson.Gson; |
12 | 15 |
import com.google.gson.GsonBuilder; |
... | ... | |
16 | 19 |
*/ |
17 | 20 |
public class DatasetsIterator implements Iterable<String>, Iterator<String> { |
18 | 21 |
|
22 |
/** The logger. */ |
|
23 |
private static final Log log = LogFactory.getLog(DatasetsIterator.class); |
|
24 |
|
|
19 | 25 |
/** The base url template. */ |
20 | 26 |
private static String BASE_URL_TEMPLATE = "http://ws.pangaea.de/es/pangaea/panmd/_search?_source=xml&size=%d&from=%d"; |
21 | 27 |
|
22 | 28 |
/** The journal id. */ |
23 |
private String journalId; |
|
29 |
private String journalId = "";
|
|
24 | 30 |
|
31 |
/** The journal name. */ |
|
32 |
private String journalName = ""; |
|
33 |
|
|
34 |
/** The journal issn. */ |
|
35 |
private String journalISSN = ""; |
|
36 |
|
|
37 |
/** The openaire datasource. */ |
|
38 |
private String openaireDatasource = ""; |
|
39 |
|
|
25 | 40 |
/** The total. */ |
26 | 41 |
private long total; |
27 | 42 |
|
... | ... | |
43 | 58 |
private String projectCordaId; |
44 | 59 |
|
45 | 60 |
private static String RECORD_TEMPLATE = "<datasetsRecord><oaf:projectid xmlns:oaf=\"http://namespace.openaire.eu/oaf\">%s</oaf:projectid>" |
46 |
+ "<metadata>%s</metadata></datasetsRecord>"; |
|
61 |
+ "<journal name='%s' issn='%s' datasourceid = '%s'/><metadata>%s</metadata></datasetsRecord>";
|
|
47 | 62 |
|
48 | 63 |
/** |
49 | 64 |
* Instantiates a new journal iterator. |
... | ... | |
51 | 66 |
* @param request |
52 | 67 |
* the request |
53 | 68 |
*/ |
54 |
public DatasetsIterator(final RequestField request, final String projectCordaId) { |
|
69 |
public DatasetsIterator(final RequestField request, final String projectCordaId, final PangaeaJorunalInfo info) {
|
|
55 | 70 |
this.request = request; |
56 | 71 |
this.setProjectCordaId(projectCordaId); |
72 |
|
|
73 |
if (info != null) { |
|
74 |
this.setJournalId(info.getJournalId()); |
|
75 |
this.setJournalName(StringEscapeUtils.escapeXml(info.getJournalName())); |
|
76 |
this.setJournalISSN(info.getJournalISSN()); |
|
77 |
this.setOpenaireDatasource(info.getDatasourceId()); |
|
78 |
} |
|
79 |
log.debug("Start Iterator"); |
|
57 | 80 |
} |
58 | 81 |
|
59 | 82 |
/** |
... | ... | |
67 | 90 |
*/ |
68 | 91 |
private String executeQuery(final int from, final int size) { |
69 | 92 |
try { |
93 |
log.debug("executing query " + this.request.getQuery().getTerm()); |
|
94 |
log.debug(String.format("from:%d size:%d", from, size)); |
|
95 |
|
|
70 | 96 |
HttpClient client = new HttpClient(); |
71 | 97 |
|
72 | 98 |
PostMethod method = new PostMethod(String.format(BASE_URL_TEMPLATE, size, from)); |
... | ... | |
114 | 140 |
|
115 | 141 |
/* |
116 | 142 |
* (non-Javadoc) |
117 |
* |
|
143 |
*
|
|
118 | 144 |
* @see java.util.Iterator#hasNext() |
119 | 145 |
*/ |
120 | 146 |
@Override |
121 | 147 |
public boolean hasNext() { |
122 |
return (from + currentIterator) < total;
|
|
148 |
return from + currentIterator < total;
|
|
123 | 149 |
} |
124 | 150 |
|
125 | 151 |
/* |
126 | 152 |
* (non-Javadoc) |
127 |
* |
|
153 |
*
|
|
128 | 154 |
* @see java.util.Iterator#next() |
129 | 155 |
*/ |
130 | 156 |
@Override |
131 | 157 |
public String next() { |
132 |
String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, currentResponse.getXmlRecords().get(currentIterator)); |
|
158 |
String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, this.journalName, this.journalISSN, this.openaireDatasource, currentResponse |
|
159 |
.getXmlRecords().get(currentIterator)); |
|
133 | 160 |
currentIterator++; |
134 | 161 |
if (currentIterator == DEFAULT_SIZE) { |
135 | 162 |
getNextItem(); |
... | ... | |
139 | 166 |
|
140 | 167 |
/* |
141 | 168 |
* (non-Javadoc) |
142 |
* |
|
169 |
*
|
|
143 | 170 |
* @see java.util.Iterator#remove() |
144 | 171 |
*/ |
145 | 172 |
@Override |
... | ... | |
170 | 197 |
from += currentIterator; |
171 | 198 |
currentResponse = ElasticSearchResponse.createNewResponse(executeQuery(from, DEFAULT_SIZE)); |
172 | 199 |
total = currentResponse.getTotal(); |
200 |
log.debug("from : " + from + " total of the request is " + total); |
|
173 | 201 |
currentIterator = 0; |
174 | 202 |
} |
175 | 203 |
|
... | ... | |
188 | 216 |
this.projectCordaId = projectCordaId; |
189 | 217 |
} |
190 | 218 |
|
219 |
/** |
|
220 |
* @return the journalName |
|
221 |
*/ |
|
222 |
public String getJournalName() { |
|
223 |
return journalName; |
|
224 |
} |
|
225 |
|
|
226 |
/** |
|
227 |
* @param journalName |
|
228 |
* the journalName to set |
|
229 |
*/ |
|
230 |
public void setJournalName(final String journalName) { |
|
231 |
this.journalName = journalName; |
|
232 |
} |
|
233 |
|
|
234 |
/** |
|
235 |
* @return the journalISSN |
|
236 |
*/ |
|
237 |
public String getJournalISSN() { |
|
238 |
return journalISSN; |
|
239 |
} |
|
240 |
|
|
241 |
/** |
|
242 |
* @param journalISSN |
|
243 |
* the journalISSN to set |
|
244 |
*/ |
|
245 |
public void setJournalISSN(final String journalISSN) { |
|
246 |
this.journalISSN = journalISSN; |
|
247 |
} |
|
248 |
|
|
249 |
/** |
|
250 |
* @return the openaireDatasource |
|
251 |
*/ |
|
252 |
public String getOpenaireDatasource() { |
|
253 |
return openaireDatasource; |
|
254 |
} |
|
255 |
|
|
256 |
/** |
|
257 |
* @param openaireDatasource |
|
258 |
* the openaireDatasource to set |
|
259 |
*/ |
|
260 |
public void setOpenaireDatasource(final String openaireDatasource) { |
|
261 |
this.openaireDatasource = openaireDatasource; |
|
262 |
} |
|
263 |
|
|
191 | 264 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectIterator.java | ||
---|---|---|
54 | 54 |
|
55 | 55 |
/* |
56 | 56 |
* (non-Javadoc) |
57 |
* |
|
57 |
*
|
|
58 | 58 |
* @see java.util.Iterator#hasNext() |
59 | 59 |
*/ |
60 | 60 |
@Override |
61 | 61 |
public boolean hasNext() { |
62 | 62 |
// CASE WHEN WE REACH THE LAST ITEM ON CSV |
63 | 63 |
// OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM |
64 |
if (this.currentProject == null) return false;
|
|
64 |
if (this.currentProject == null) { return false; }
|
|
65 | 65 |
// IN THIS CASE WE HAVE ANOTHER DATASETS |
66 | 66 |
// FOR THE CURRENT PROJECT AND RETURN TRUE |
67 |
if ((currentIterator != null) && currentIterator.hasNext()) return true;
|
|
67 |
if (currentIterator != null && currentIterator.hasNext()) { return true; }
|
|
68 | 68 |
// OTHERWISE WE FINISHED TO ITERATE THE CURRENT |
69 | 69 |
// SETS OF DATASETS FOR A PARTICULAR PROJECT |
70 | 70 |
// SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH |
... | ... | |
75 | 75 |
currentIterator = getNextIterator(); |
76 | 76 |
// IF THE NEXT ITERATOR HAS ITEMS RETURN YES |
77 | 77 |
// OTHERWISE THE CICLE CONTINUE |
78 |
if (currentIterator.hasNext()) return true;
|
|
78 |
if (currentIterator.hasNext()) { return true; }
|
|
79 | 79 |
this.currentProject = extractNextLine(); |
80 | 80 |
} |
81 | 81 |
return false; |
... | ... | |
84 | 84 |
|
85 | 85 |
/* |
86 | 86 |
* (non-Javadoc) |
87 |
* |
|
87 |
*
|
|
88 | 88 |
* @see java.util.Iterator#next() |
89 | 89 |
*/ |
90 | 90 |
@Override |
... | ... | |
94 | 94 |
|
95 | 95 |
/* |
96 | 96 |
* (non-Javadoc) |
97 |
* |
|
97 |
*
|
|
98 | 98 |
* @see java.util.Iterator#remove() |
99 | 99 |
*/ |
100 | 100 |
@Override |
... | ... | |
102 | 102 |
|
103 | 103 |
/* |
104 | 104 |
* (non-Javadoc) |
105 |
* |
|
105 |
*
|
|
106 | 106 |
* @see java.lang.Iterable#iterator() |
107 | 107 |
*/ |
108 | 108 |
@Override |
... | ... | |
120 | 120 |
RequestField r = new RequestField(); |
121 | 121 |
r.setQuery(q); |
122 | 122 |
q.getTerm().put("ft-techkeyword", this.currentProject.get(PROJECT_ID_KEY)); |
123 |
return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY)).iterator(); |
|
123 |
return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY), null).iterator();
|
|
124 | 124 |
} |
125 | 125 |
|
126 | 126 |
/** |
... | ... | |
138 | 138 |
return null; |
139 | 139 |
} |
140 | 140 |
// WE REACH THE END OF THE CSV |
141 |
if (line == null) return null;
|
|
141 |
if (line == null) { return null; }
|
|
142 | 142 |
log.debug("splitting line: " + line); |
143 | 143 |
String[] values = line.split(SPLIT_REGEX); |
144 |
if ((values == null) || (values.length != 4)) {
|
|
144 |
if (values == null || values.length != 4) {
|
|
145 | 145 |
log.error("Error on splitting line, the length must be 4"); |
146 | 146 |
return null; |
147 | 147 |
} |
... | ... | |
152 | 152 |
splittedMap.put(PROJECT_CORDA_ID_KEY, cordaId); |
153 | 153 |
splittedMap.put(PROJECT_ID_KEY, "project" + id); |
154 | 154 |
splittedMap.put(PROJECT_NAME_KEY, project_name); |
155 |
log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, ("project" + id)));
|
|
155 |
log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, "project" + id));
|
|
156 | 156 |
return splittedMap; |
157 | 157 |
} |
158 | 158 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/hostedby/FindHostedByJonbNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.hostedby; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
|
|
5 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
|
6 |
|
|
7 |
import org.dom4j.Document; |
|
8 |
import org.dom4j.Element; |
|
9 |
import org.dom4j.io.SAXReader; |
|
10 |
|
|
11 |
import com.googlecode.sarasvati.Arc; |
|
12 |
import com.googlecode.sarasvati.NodeToken; |
|
13 |
|
|
14 |
import eu.dnetlib.enabling.resultset.MappedResultSetFactory; |
|
15 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
|
16 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
|
17 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
18 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
19 |
|
|
20 |
// TODO: Auto-generated Javadoc |
|
21 |
/** |
|
22 |
* The Class FindHostedByJonbNode. |
|
23 |
*/ |
|
24 |
public class FindHostedByJonbNode extends SimpleJobNode { |
|
25 |
|
|
26 |
/** The input epr param. */ |
|
27 |
private String inputEprParam; |
|
28 |
|
|
29 |
/** The output epr param. */ |
|
30 |
private String outputEprParam; |
|
31 |
|
|
32 |
/** The counters param. */ |
|
33 |
private String countersParam; |
|
34 |
|
|
35 |
/** The result set client factory. */ |
|
36 |
private ResultSetClientFactory resultSetClientFactory; |
|
37 |
|
|
38 |
/** The mapped result set factory. */ |
|
39 |
private MappedResultSetFactory mappedResultSetFactory; |
|
40 |
|
|
41 |
private final String unknown_repo_id = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18"; |
|
42 |
|
|
43 |
/* |
|
44 |
* (non-Javadoc) |
|
45 |
* |
|
46 |
* @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken) |
|
47 |
*/ |
|
48 |
@Override |
|
49 |
protected String execute(final NodeToken token) throws Exception { |
|
50 |
final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam)); |
|
51 |
final HostedByCounters counters = new HostedByCounters(); |
|
52 |
|
|
53 |
final SAXReader reader = new SAXReader(); |
|
54 |
|
|
55 |
final UnaryFunction<String, String> hostedByMapFunction = new UnaryFunction<String, String>() { |
|
56 |
|
|
57 |
@Override |
|
58 |
public String evaluate(final String input) { |
|
59 |
try { |
|
60 |
final Document doc = reader.read(new StringReader(input)); |
|
61 |
final Element node = (Element) doc.selectSingleNode("//*[local-name()='hostedBy']"); |
|
62 |
if (node != null) { |
|
63 |
String hostedById = node.attributeValue("id"); |
|
64 |
if (!hostedById.equals(unknown_repo_id)) { |
|
65 |
counters.increaseCounter(hostedById); |
|
66 |
} |
|
67 |
} |
|
68 |
} catch (Exception e) { |
|
69 |
|
|
70 |
} |
|
71 |
|
|
72 |
return input; |
|
73 |
} |
|
74 |
}; |
|
75 |
|
|
76 |
final W3CEndpointReference epr = mappedResultSetFactory.createMappedResultSet(inputEpr, hostedByMapFunction); |
|
77 |
token.getEnv().setAttribute(outputEprParam, epr.toString()); |
|
78 |
token.getEnv().setTransientAttribute(getCountersParam(), counters); |
|
79 |
|
|
80 |
return Arc.DEFAULT_ARC; |
|
81 |
} |
|
82 |
|
|
83 |
/** |
|
84 |
* @return the inputEprParam |
|
85 |
*/ |
|
86 |
public String getInputEprParam() { |
|
87 |
return inputEprParam; |
|
88 |
} |
|
89 |
|
|
90 |
/** |
|
91 |
* @param inputEprParam |
|
92 |
* the inputEprParam to set |
|
93 |
*/ |
|
94 |
public void setInputEprParam(final String inputEprParam) { |
|
95 |
this.inputEprParam = inputEprParam; |
|
96 |
} |
|
97 |
|
|
98 |
/** |
|
99 |
* @return the outputEprParam |
|
100 |
*/ |
|
101 |
public String getOutputEprParam() { |
|
102 |
return outputEprParam; |
|
103 |
} |
|
104 |
|
|
105 |
/** |
|
106 |
* @param outputEprParam |
|
107 |
* the outputEprParam to set |
|
108 |
*/ |
|
109 |
public void setOutputEprParam(final String outputEprParam) { |
|
110 |
this.outputEprParam = outputEprParam; |
|
111 |
} |
|
112 |
|
|
113 |
/** |
|
114 |
* @return the resultSetClientFactory |
|
115 |
*/ |
|
116 |
public ResultSetClientFactory getResultSetClientFactory() { |
|
117 |
return resultSetClientFactory; |
|
118 |
} |
|
119 |
|
|
120 |
/** |
|
121 |
* @param resultSetClientFactory |
|
122 |
* the resultSetClientFactory to set |
|
123 |
*/ |
|
124 |
public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) { |
|
125 |
this.resultSetClientFactory = resultSetClientFactory; |
|
126 |
} |
|
127 |
|
|
128 |
/** |
|
129 |
* @return the mappedResultSetFactory |
|
130 |
*/ |
|
131 |
public MappedResultSetFactory getMappedResultSetFactory() { |
|
132 |
return mappedResultSetFactory; |
|
133 |
} |
|
134 |
|
|
135 |
/** |
|
136 |
* @param mappedResultSetFactory |
|
137 |
* the mappedResultSetFactory to set |
|
138 |
*/ |
|
139 |
public void setMappedResultSetFactory(final MappedResultSetFactory mappedResultSetFactory) { |
|
140 |
this.mappedResultSetFactory = mappedResultSetFactory; |
|
141 |
} |
|
142 |
|
|
143 |
/** |
|
144 |
* @return the countersParam |
|
145 |
*/ |
|
146 |
public String getCountersParam() { |
|
147 |
return countersParam; |
|
148 |
} |
|
149 |
|
|
150 |
/** |
|
151 |
* @param countersParam |
|
152 |
* the countersParam to set |
|
153 |
*/ |
|
154 |
public void setCountersParam(final String countersParam) { |
|
155 |
this.countersParam = countersParam; |
|
156 |
} |
|
157 |
|
|
158 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/SplitDatasetRecord.java | ||
---|---|---|
39 | 39 |
|
40 | 40 |
/* |
41 | 41 |
* (non-Javadoc) |
42 |
*
|
|
42 |
* |
|
43 | 43 |
* @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken) |
44 | 44 |
*/ |
45 | 45 |
@Override |
46 | 46 |
protected String execute(final NodeToken token) throws Exception { |
47 |
final W3CEndpointReference inputEpr = (new EPRUtils()).getEpr(token.getEnv().getAttribute(inputEprParm));
|
|
47 |
final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParm));
|
|
48 | 48 |
Iterable<String> input = resultSetClientFactory.getClient(inputEpr); |
49 | 49 |
final LinkedBlockingQueue<String> publicationsQueue = new LinkedBlockingQueue<String>(); |
50 | 50 |
final SplitterDatasetsIterator splitterIterator = new SplitterDatasetsIterator(publicationsQueue, input, "publications"); |
... | ... | |
60 | 60 |
|
61 | 61 |
@Override |
62 | 62 |
public Iterator<String> iterator() { |
63 |
return new IteratorOnQueue(publicationsQueue);
|
|
63 |
return new IteratorOnQueue<String>(publicationsQueue, SplitterDatasetsIterator.END_QUEUE);
|
|
64 | 64 |
} |
65 | 65 |
}); |
66 | 66 |
|
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByJournalPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.datasets; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStreamReader; |
|
5 |
import java.net.URL; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.List; |
|
8 |
|
|
9 |
import eu.dnetlib.data.collector.plugin.CollectorPlugin; |
|
10 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
11 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
12 |
|
|
13 |
public class DatasetsByJournalPlugin implements CollectorPlugin { |
|
14 |
|
|
15 |
@Override |
|
16 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
17 |
throws CollectorServiceException { |
|
18 |
try { |
|
19 |
URL url = new URL(interfaceDescriptor.getBaseUrl()); |
|
20 |
url.openConnection(); |
|
21 |
InputStreamReader reader = new InputStreamReader(url.openStream()); |
|
22 |
DatasetsByProjectIterator iterator = new DatasetsByProjectIterator(reader); |
|
23 |
return iterator; |
|
24 |
} catch (IOException e) { |
|
25 |
throw new CollectorServiceException("OOOPS something bad happen on creating iterator ", e); |
|
26 |
} |
|
27 |
|
|
28 |
} |
|
29 |
|
|
30 |
@Override |
|
31 |
public String getProtocol() { |
|
32 |
|
|
33 |
return "datasetsbyjournal"; |
|
34 |
} |
|
35 |
|
|
36 |
@Override |
|
37 |
public List<String> listNameParameters() { |
|
38 |
|
|
39 |
return new ArrayList<String>(); |
|
40 |
} |
|
41 |
|
|
42 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/CollectDatasetsByJournalJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.concurrent.ArrayBlockingQueue; |
|
5 |
import java.util.concurrent.BlockingQueue; |
|
6 |
import java.util.concurrent.Executor; |
|
7 |
import java.util.concurrent.Executors; |
|
8 |
|
|
9 |
import javax.annotation.Resource; |
|
10 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
|
11 |
|
|
12 |
import org.apache.commons.lang.StringUtils; |
|
13 |
import org.dom4j.Document; |
|
14 |
import org.dom4j.io.SAXReader; |
|
15 |
|
|
16 |
import com.googlecode.sarasvati.Arc; |
|
17 |
import com.googlecode.sarasvati.NodeToken; |
|
18 |
|
|
19 |
import eu.dnetlib.data.collector.plugins.datasets.DatasetsByJournalIterator; |
|
20 |
import eu.dnetlib.data.collector.plugins.datasets.PangaeaJorunalInfo; |
|
21 |
import eu.dnetlib.enabling.resultset.IterableResultSetFactory; |
|
22 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
|
23 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
|
24 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
25 |
|
|
26 |
// TODO: Auto-generated Javadoc |
|
27 |
/** |
|
28 |
* The Class CollectDatasetsByJournalJobNode. |
|
29 |
*/ |
|
30 |
public class CollectDatasetsByJournalJobNode extends SimpleJobNode { |
|
31 |
|
|
32 |
public static PangaeaJorunalInfo END_QUEUE = new PangaeaJorunalInfo(); |
|
33 |
|
|
34 |
/** The datasource id. */ |
|
35 |
private String datasourceId; |
|
36 |
|
|
37 |
/** The output epr param. */ |
|
38 |
private String outputEprParam; |
|
39 |
|
|
40 |
/** The input epr param. */ |
|
41 |
private String inputEprParam; |
|
42 |
|
|
43 |
/** The result set client factory. */ |
|
44 |
private ResultSetClientFactory resultSetClientFactory; |
|
45 |
|
|
46 |
/** The result set factory. */ |
|
47 |
@Resource(name = "iterableResultSetFactory") |
|
48 |
private IterableResultSetFactory resultSetFactory; |
|
49 |
|
|
50 |
/** The executor. */ |
|
51 |
private Executor executor = Executors.newSingleThreadExecutor(); |
|
52 |
|
|
53 |
/* |
|
54 |
* (non-Javadoc) |
|
55 |
* |
|
56 |
* @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken) |
|
57 |
*/ |
|
58 |
@Override |
|
59 |
protected String execute(final NodeToken token) throws Exception { |
|
60 |
final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam)); |
|
61 |
final Iterable<String> input = resultSetClientFactory.getClient(inputEpr); |
|
62 |
final BlockingQueue<PangaeaJorunalInfo> publicationsQueue = new ArrayBlockingQueue<PangaeaJorunalInfo>(500); |
|
63 |
|
|
64 |
executor.execute(new Runnable() { |
|
65 |
|
|
66 |
@Override |
|
67 |
public void run() { |
|
68 |
final SAXReader reader = new SAXReader(); |
|
69 |
|
|
70 |
for (String inputString : input) { |
|
71 |
try { |
|
72 |
Document doc = reader.read(new StringReader(inputString)); |
|
73 |
final String entry = doc.valueOf("//FIELD[@name='id']"); |
|
74 |
final String dsId = doc.valueOf("//FIELD[@name='datasource']"); |
|
75 |
final String dsName = doc.valueOf("//FIELD[@name='name']"); |
|
76 |
String jISSN = StringUtils.substringBefore(entry, "__"); |
|
77 |
String jId = StringUtils.substringAfter(entry, "__"); |
|
78 |
PangaeaJorunalInfo info = new PangaeaJorunalInfo(); |
|
79 |
info.setDatasourceId(dsId); |
|
80 |
info.setJournalId(jId); |
|
81 |
info.setJournalName(dsName); |
|
82 |
info.setJournalISSN(jISSN); |
|
83 |
publicationsQueue.put(info); |
|
84 |
} catch (Exception e) { |
|
85 |
|
|
86 |
} |
|
87 |
} |
|
88 |
try { |
|
89 |
publicationsQueue.put(END_QUEUE); |
|
90 |
} catch (InterruptedException e) { |
|
91 |
|
|
92 |
} |
|
93 |
|
|
94 |
} |
|
95 |
}); |
|
96 |
|
|
97 |
IteratorOnQueue<PangaeaJorunalInfo> itOnQueue = new IteratorOnQueue<PangaeaJorunalInfo>(publicationsQueue, CollectDatasetsByJournalJobNode.END_QUEUE); |
|
98 |
|
|
99 |
W3CEndpointReference eprOutput = resultSetFactory.createIterableResultSet(new DatasetsByJournalIterator(itOnQueue)); |
|
100 |
token.getEnv().setAttribute(getOutputEprParam(), eprOutput.toString()); |
|
101 |
return Arc.DEFAULT_ARC; |
|
102 |
} |
|
103 |
|
|
104 |
/** |
|
105 |
* Gets the datasource id. |
|
106 |
* |
|
107 |
* @return the datasourceId |
|
108 |
*/ |
|
109 |
public String getDatasourceId() { |
|
110 |
return datasourceId; |
|
111 |
} |
|
112 |
|
|
113 |
/** |
|
114 |
* Sets the datasource id. |
|
115 |
* |
|
116 |
* @param datasourceId |
|
117 |
* the datasourceId to set |
|
118 |
*/ |
|
119 |
public void setDatasourceId(final String datasourceId) { |
|
120 |
this.datasourceId = datasourceId; |
|
121 |
} |
|
122 |
|
|
123 |
/** |
|
124 |
* @return the outputEprParam |
|
125 |
*/ |
|
126 |
public String getOutputEprParam() { |
|
127 |
return outputEprParam; |
|
128 |
} |
|
129 |
|
|
130 |
/** |
|
131 |
* @param outputEprParam |
|
132 |
* the outputEprParam to set |
|
133 |
*/ |
|
134 |
public void setOutputEprParam(final String outputEprParam) { |
|
135 |
this.outputEprParam = outputEprParam; |
|
136 |
} |
|
137 |
|
|
138 |
/** |
|
139 |
* @return the inputEprParam |
|
140 |
*/ |
|
141 |
public String getInputEprParam() { |
|
142 |
return inputEprParam; |
|
143 |
} |
|
144 |
|
|
145 |
/** |
|
146 |
* @param inputEprParam |
|
147 |
* the inputEprParam to set |
|
148 |
*/ |
|
149 |
public void setInputEprParam(final String inputEprParam) { |
|
150 |
this.inputEprParam = inputEprParam; |
|
151 |
} |
|
152 |
|
|
153 |
/** |
|
154 |
* Gets the result set client factory. |
|
155 |
* |
|
156 |
* @return the resultSetClientFactory |
|
157 |
*/ |
|
158 |
public ResultSetClientFactory getResultSetClientFactory() { |
|
159 |
return resultSetClientFactory; |
|
160 |
} |
|
161 |
|
|
162 |
/** |
|
163 |
* Sets the result set client factory. |
|
164 |
* |
|
165 |
* @param resultSetClientFactory |
|
166 |
* the resultSetClientFactory to set |
|
167 |
*/ |
|
168 |
public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) { |
|
169 |
this.resultSetClientFactory = resultSetClientFactory; |
|
170 |
} |
|
171 |
|
|
172 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/IteratorOnQueue.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite; |
2 | 2 |
|
3 | 3 |
import java.util.Iterator; |
4 |
import java.util.concurrent.LinkedBlockingQueue;
|
|
4 |
import java.util.concurrent.BlockingQueue; |
|
5 | 5 |
|
6 | 6 |
import org.apache.commons.logging.Log; |
7 | 7 |
import org.apache.commons.logging.LogFactory; |
8 | 8 |
|
9 |
public class IteratorOnQueue implements Iterator<String> { |
|
9 |
/** |
|
10 |
* The Class IteratorOnQueue. |
|
11 |
* |
|
12 |
* @param <T> |
|
13 |
* the generic type |
|
14 |
*/ |
|
15 |
public class IteratorOnQueue<T> implements Iterator<T> { |
|
10 | 16 |
|
11 | 17 |
/** The Constant log. */ |
12 | 18 |
private static final Log log = LogFactory.getLog(IteratorOnQueue.class); |
13 |
private final LinkedBlockingQueue<String> inputQueue; |
|
14 |
private String currentItem; |
|
15 | 19 |
|
16 |
public IteratorOnQueue(final LinkedBlockingQueue<String> inputQueue) { |
|
20 |
/** The input queue. */ |
|
21 |
private final BlockingQueue<T> inputQueue; |
|
22 |
|
|
23 |
/** The current item. */ |
|
24 |
private T currentItem; |
|
25 |
|
|
26 |
/** The end queue. */ |
|
27 |
private T endQueue; |
|
28 |
|
|
29 |
/** |
|
30 |
* Instantiates a new iterator on queue. |
|
31 |
* |
|
32 |
* @param inputQueue |
|
33 |
* the input queue |
|
34 |
* @param endQueue |
|
35 |
* the end queue |
|
36 |
*/ |
|
37 |
public IteratorOnQueue(final BlockingQueue<T> inputQueue, final T endQueue) { |
|
17 | 38 |
this.inputQueue = inputQueue; |
39 |
this.endQueue = endQueue; |
|
40 |
|
|
18 | 41 |
try { |
19 | 42 |
currentItem = this.inputQueue.take(); |
20 | 43 |
} catch (InterruptedException e) { |
... | ... | |
22 | 45 |
} |
23 | 46 |
} |
24 | 47 |
|
48 |
/** |
|
49 |
* Checks for next. |
|
50 |
* |
|
51 |
* @return true, if successful |
|
52 |
*/ |
|
25 | 53 |
@Override |
26 | 54 |
public boolean hasNext() { |
27 | 55 |
|
28 |
return (currentItem != SplitterDatasetsIterator.END_QUEUE);
|
|
56 |
return currentItem != this.endQueue;
|
|
29 | 57 |
} |
30 | 58 |
|
59 |
/** |
|
60 |
* Next. |
|
61 |
* |
|
62 |
* @return the t |
|
63 |
*/ |
|
31 | 64 |
@Override |
32 |
public String next() {
|
|
65 |
public T next() {
|
|
33 | 66 |
|
34 |
String previous = currentItem;
|
|
67 |
T previous = currentItem;
|
|
35 | 68 |
try { |
36 | 69 |
currentItem = this.inputQueue.take(); |
37 | 70 |
} catch (Exception e) { |
... | ... | |
40 | 73 |
return previous; |
41 | 74 |
} |
42 | 75 |
|
76 |
/** |
|
77 |
* Removes the. |
|
78 |
*/ |
|
43 | 79 |
@Override |
44 | 80 |
public void remove() { |
45 | 81 |
// TODO Auto-generated method stub |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/datasets_by_journal.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="884e520e-f141-4a17-a0d4-98f8222ef2b0_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
8 |
<RESOURCE_URI value="value3" /> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Datasets by journal</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true"> |
|
17 |
<DESCRIPTION>Retrieve datasets by projects id</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">datarepository::unknown</PARAM> |
|
20 |
<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM> |
|
21 |
</PARAMETERS> |
|
22 |
<ARCS> |
|
23 |
<ARC to="createMetaWf"/> |
|
24 |
<ARC to="validateDs" name="validateDs"/> |
|
25 |
</ARCS> |
|
26 |
</NODE> |
|
27 |
|
|
28 |
<NODE name="validateDs" type="ValidateDatasource"> |
|
29 |
<DESCRIPTION>Validate DS</DESCRIPTION> |
|
30 |
<PARAMETERS/> |
|
31 |
<ARCS> |
|
32 |
<ARC to="createMetaWf"/> |
|
33 |
</ARCS> |
|
34 |
</NODE> |
|
35 |
|
|
36 |
<NODE name="createMetaWf" type="RegisterMetaWf"> |
|
37 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
38 |
<PARAMETERS> |
|
39 |
<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM> |
|
40 |
</PARAMETERS> |
|
41 |
<ARCS> |
|
42 |
<ARC to="createDatacite"/> |
|
43 |
<ARC to="createODF"/> |
|
44 |
<ARC to="createOAF"/> |
|
45 |
</ARCS> |
|
46 |
</NODE> |
|
47 |
|
|
48 |
<NODE name="createDatacite" type="CreateMDStore"> |
|
49 |
<DESCRIPTION>Create dataset native store</DESCRIPTION> |
|
50 |
<PARAMETERS> |
|
51 |
<PARAM name="format" managedBy="system" required="true" type="string">dataset</PARAM> |
|
52 |
<PARAM name="interpretation" managedBy="system" required="true" type="string">native</PARAM> |
|
53 |
<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM> |
|
54 |
<PARAM name="outputPrefix" managedBy="system" required="true" type="string">harv_</PARAM> |
|
55 |
</PARAMETERS> |
|
56 |
<ARCS> |
|
57 |
<ARC to="updateMetaWf" /> |
|
58 |
</ARCS> |
|
59 |
</NODE> |
|
60 |
<NODE name="createODF" type="CreateMDStore"> |
|
61 |
<DESCRIPTION>Create ODF_dataset cleaned store</DESCRIPTION> |
|
62 |
<PARAMETERS> |
|
63 |
<PARAM name="format" managedBy="system" required="true" type="string">ODF</PARAM> |
|
64 |
<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM> |
|
65 |
<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM> |
|
66 |
<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tranODF_</PARAM> |
|
67 |
</PARAMETERS> |
|
68 |
<ARCS> |
|
69 |
<ARC to="updateMetaWf" /> |
|
70 |
</ARCS> |
|
71 |
</NODE> |
|
72 |
<NODE name="createOAF" type="CreateMDStore"> |
|
73 |
<DESCRIPTION>Create OAF_publication cleaned store</DESCRIPTION> |
|
74 |
<PARAMETERS> |
|
75 |
<PARAM name="format" managedBy="system" required="true" type="string">OAF</PARAM> |
|
76 |
<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM> |
|
77 |
<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM> |
|
78 |
<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tranOAF_</PARAM> |
|
79 |
</PARAMETERS> |
|
80 |
<ARCS> |
|
81 |
<ARC to="updateMetaWf" /> |
|
82 |
</ARCS> |
|
83 |
</NODE> |
|
84 |
|
|
85 |
<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true"> |
|
86 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
87 |
<PARAMETERS> |
|
88 |
<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetbyJournalsOpenaireMdRecords</PARAM> |
|
89 |
</PARAMETERS> |
|
90 |
<ARCS> |
|
91 |
<ARC to="updateMetaWfStatus" /> |
|
92 |
</ARCS> |
|
93 |
</NODE> |
|
94 |
|
|
95 |
<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus"> |
|
96 |
<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION> |
|
97 |
<PARAMETERS /> |
|
98 |
<ARCS> |
|
99 |
<ARC to="success" /> |
|
100 |
</ARCS> |
|
101 |
</NODE> |
|
102 |
</CONFIGURATION> |
|
103 |
<STATUS /> |
|
104 |
</BODY> |
|
105 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/aggregatordataRepository_default_ingestion.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="95b4317d0-a9d4-4ced-862b-df1182f35a8e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
7 |
<RESOURCE_URI value="value3" /> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>aggregator datarepository metadata records ingestion</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="manual"> |
|
15 |
<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true"> |
|
16 |
<DESCRIPTION>Verify if DS is pending</DESCRIPTION> |
|
17 |
<PARAMETERS> |
|
18 |
<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">aggregator::datarepository</PARAM> |
|
19 |
<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM> |
|
20 |
</PARAMETERS> |
|
21 |
<ARCS> |
|
22 |
<ARC to="createMetaWf"/> |
|
23 |
<ARC to="validateDs" name="validateDs"/> |
|
24 |
</ARCS> |
|
25 |
</NODE> |
|
26 |
|
|
27 |
<NODE name="validateDs" type="ValidateDatasource"> |
|
28 |
<DESCRIPTION>Validate DS</DESCRIPTION> |
|
29 |
<PARAMETERS/> |
|
30 |
<ARCS> |
|
31 |
<ARC to="createMetaWf"/> |
|
32 |
</ARCS> |
|
33 |
</NODE> |
|
34 |
|
|
35 |
<NODE name="createMetaWf" type="RegisterMetaWf"> |
|
36 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
37 |
<PARAMETERS> |
|
38 |
<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM> |
|
39 |
</PARAMETERS> |
|
40 |
<ARCS> |
|
41 |
<ARC to="createDatacite"/> |
|
42 |
<ARC to="createODF"/> |
|
43 |
</ARCS> |
|
44 |
</NODE> |
|
45 |
|
|
46 |
<NODE name="createDatacite" type="CreateMDStore"> |
|
47 |
<DESCRIPTION>Create oai_datacite native store</DESCRIPTION> |
|
48 |
<PARAMETERS> |
|
49 |
<PARAM name="format" managedBy="system" required="true" type="string">oai_datacite</PARAM> |
|
50 |
<PARAM name="interpretation" managedBy="system" required="true" type="string">native</PARAM> |
|
51 |
<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM> |
|
52 |
<PARAM name="outputPrefix" managedBy="system" required="true" type="string">harv_</PARAM> |
|
53 |
</PARAMETERS> |
|
54 |
<ARCS> |
|
55 |
<ARC to="updateMetaWf" /> |
|
56 |
</ARCS> |
|
57 |
</NODE> |
|
58 |
<NODE name="createODF" type="CreateMDStore"> |
|
59 |
<DESCRIPTION>Create ODF_datacite cleaned store</DESCRIPTION> |
|
60 |
<PARAMETERS> |
|
61 |
<PARAM name="format" managedBy="system" required="true" type="string">ODF</PARAM> |
|
62 |
<PARAM name="interpretation" managedBy="system" required="true" type="string">cleaned</PARAM> |
|
63 |
<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM> |
|
64 |
<PARAM name="outputPrefix" managedBy="system" required="true" type="string">tran_</PARAM> |
|
65 |
</PARAMETERS> |
|
66 |
<ARCS> |
|
67 |
<ARC to="updateMetaWf" /> |
|
68 |
</ARCS> |
|
69 |
</NODE> |
|
70 |
|
|
71 |
<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true"> |
|
72 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
73 |
<PARAMETERS> |
|
74 |
<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfaggregatorDataciteOpenaireMdRecords</PARAM> |
|
75 |
</PARAMETERS> |
|
76 |
<ARCS> |
|
77 |
<ARC to="updateMetaWfStatus" /> |
|
78 |
</ARCS> |
|
79 |
</NODE> |
|
80 |
|
|
81 |
<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus"> |
|
82 |
<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION> |
|
83 |
<PARAMETERS /> |
|
84 |
<ARCS> |
|
85 |
<ARC to="success" /> |
|
86 |
</ARCS> |
|
87 |
</NODE> |
|
88 |
</CONFIGURATION> |
|
89 |
<STATUS /> |
|
90 |
</BODY> |
|
91 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/pangaeadatasets_by_journal.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="61995a63-5922-4fac-be67-5970bab0095d_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
8 |
<RESOURCE_URI value="value3" /> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>HostedBy Map Pangaea Journal</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true"> |
|
17 |
<DESCRIPTION>Retrieve datasets by projects id</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">datarepository::unknown</PARAM> |
|
20 |
<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string"></PARAM> |
|
21 |
</PARAMETERS> |
|
22 |
<ARCS> |
|
23 |
<ARC to="createMetaWf"/> |
|
24 |
<ARC to="validateDs" name="validateDs"/> |
|
25 |
</ARCS> |
|
26 |
</NODE> |
|
27 |
|
|
28 |
<NODE name="validateDs" type="ValidateDatasource"> |
|
29 |
<DESCRIPTION>Validate DS</DESCRIPTION> |
|
30 |
<PARAMETERS/> |
|
31 |
<ARCS> |
|
32 |
<ARC to="createMetaWf"/> |
|
33 |
</ARCS> |
|
34 |
</NODE> |
|
35 |
|
|
36 |
<NODE name="createMetaWf" type="RegisterMetaWf"> |
|
37 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
38 |
<PARAMETERS> |
|
39 |
<PARAM name="wfName" managedBy="system" required="true" type="string">Collect and transform metadata records from data repository</PARAM> |
|
40 |
</PARAMETERS> |
|
41 |
<ARCS> |
|
42 |
<ARC to="updateMetaWf"/> |
|
43 |
</ARCS> |
|
44 |
</NODE> |
|
45 |
|
|
46 |
<NODE name="updateMetaWf" type="UpdateMetaWf" > |
|
47 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
|
48 |
<PARAMETERS> |
|
49 |
<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfHostedByMapJournalPangaea</PARAM> |
|
50 |
</PARAMETERS> |
|
51 |
<ARCS> |
|
52 |
<ARC to="updateMetaWfStatus" /> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
55 |
|
|
56 |
<NODE name="updateMetaWfStatus" type="UpdateOpenaireMetaWfStatus"> |
|
57 |
<DESCRIPTION>Update MetaWorkflow Status</DESCRIPTION> |
|
58 |
<PARAMETERS /> |
|
59 |
<ARCS> |
|
60 |
<ARC to="success" /> |
|
61 |
</ARCS> |
|
62 |
</NODE> |
|
63 |
</CONFIGURATION> |
|
64 |
<STATUS /> |
|
65 |
</BODY> |
|
66 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/repo-hi/datasets_by_projects.xml | ||
---|---|---|
85 | 85 |
<NODE name="updateMetaWf" type="UpdateMetaWf" isJoin="true"> |
86 | 86 |
<DESCRIPTION>Create MetaWorkflow</DESCRIPTION> |
87 | 87 |
<PARAMETERS> |
88 |
<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetOpenaireMdRecords</PARAM> |
|
88 |
<PARAM name="beanName" managedBy="system" required="true" type="string">metaWfDatasetbyProjectsOpenaireMdRecords</PARAM>
|
|
89 | 89 |
</PARAMETERS> |
90 | 90 |
<ARCS> |
91 | 91 |
<ARC to="updateMetaWfStatus" /> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt-datacite/DatasetfromPangaeaTransform.xsl | ||
---|---|---|
121 | 121 |
</contributor> |
122 | 122 |
</contributors> |
123 | 123 |
</resource> |
124 |
<xsl:variable name="journalName"><xsl:value-of select="//*[local-name() ='journal']/@name"/></xsl:variable> |
|
125 |
<xsl:variable name="journalISSN"><xsl:value-of select="//*[local-name() ='journal']/@issn"/></xsl:variable> |
|
126 |
<xsl:variable name="journalDSId"><xsl:value-of select="//*[local-name() ='journal']/@datasourceid"/></xsl:variable> |
|
127 |
|
|
128 |
<xsl:choose> |
|
129 |
<xsl:when test="string-length($journalISSN) > 0"> |
|
130 |
<oaf:journal issn="{$journalISSN}" eissn="" > |
|
131 |
<xsl:value-of select="$journalName"/> |
|
132 |
</oaf:journal> |
|
133 |
<oaf:hostedBy> |
|
134 |
<xsl:attribute name="id"><xsl:value-of select="$journalDSId"/></xsl:attribute> |
|
135 |
<xsl:attribute name="name"><xsl:value-of select="$journalName"/></xsl:attribute> |
|
136 |
</oaf:hostedBy> |
|
137 |
</xsl:when> |
|
138 |
<xsl:otherwise> |
|
139 |
<oaf:hostedBy |
|
140 |
id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" |
|
141 |
name="Unknown Repository"/> |
|
142 |
</xsl:otherwise> |
|
143 |
</xsl:choose> |
|
124 | 144 |
<xsl:for-each select="//md:citation/md:supplementTo/@id"> |
125 | 145 |
<xsl:variable name="publicationID"> |
126 | 146 |
<xsl:value-of select="."/> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt-datacite/PublicationFromPangaeatransform.xsl | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<xsl:stylesheet version="1.0" |
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
5 |
xmlns:datetime="http://exslt.org/dates-and-times" |
|
6 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
7 |
xmlns:md="http://www.pangaea.de/MetaData" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
8 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:fn="http://www.w3.org/2005/xpath-functions" |
|
9 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions" |
|
10 |
xmlns:stringUtils="org.apache.commons.lang.StringUtils" |
|
11 |
exclude-result-prefixes="xsl dnet oaa fn stringUtils datetime"> |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
5 |
xmlns:datetime="http://exslt.org/dates-and-times" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:md="http://www.pangaea.de/MetaData" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
7 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:fn="http://www.w3.org/2005/xpath-functions" |
|
8 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions" |
|
9 |
xmlns:stringUtils="org.apache.commons.lang.StringUtils" |
|
10 |
exclude-result-prefixes="xsl dnet oaa fn stringUtils datetime"> |
|
12 | 11 |
|
13 |
<xsl:param name="namespacePrefix"/>
|
|
14 |
<xsl:param name="dataprovider_id"/>
|
|
15 |
<xsl:param name="parentDatasourceId"/>
|
|
16 |
<xsl:param name="dataprovider_name"/>
|
|
12 |
<xsl:param name="namespacePrefix" />
|
|
13 |
<xsl:param name="dataprovider_id" />
|
|
14 |
<xsl:param name="parentDatasourceId" />
|
|
15 |
<xsl:param name="dataprovider_name" />
|
|
17 | 16 |
|
18 |
<xsl:template match="/"> |
|
19 |
<xsl:variable name="identifier"> |
|
20 |
<xsl:value-of select="stringUtils:substringAfter(//md:citation/md:URI,'doi:')"/> |
|
21 |
</xsl:variable> |
|
22 |
<xsl:variable name="identifier_datacite"> |
|
23 |
<xsl:value-of select="oai:record/oai:header/dri:objIdentifier"/> |
|
24 |
</xsl:variable> |
|
25 |
<publications> |
|
26 |
<xsl:for-each select="//md:citation/md:supplementTo"> |
|
27 |
<xsl:variable name="pub_identifier"> |
|
28 |
<xsl:value-of select="./@id"/> |
|
29 |
</xsl:variable> |
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="string-length($pub_identifier)"> |
|
32 |
<xsl:call-template name="GeneratePublication"> |
|
33 |
<xsl:with-param name="pub_identifier" select="$pub_identifier"/> |
|
34 |
</xsl:call-template> |
|
35 |
</xsl:when> |
|
36 |
</xsl:choose> |
|
37 |
</xsl:for-each> |
|
38 |
</publications> |
|
39 |
</xsl:template> |
|
40 |
<xsl:template name="GeneratePublication"> |
|
41 |
<xsl:param name="pub_identifier"/> |
|
42 |
<publication> |
|
43 |
<oai:record> |
|
44 |
<oai:header> |
|
45 |
<dri:objIdentifier> |
|
46 |
<xsl:value-of |
|
47 |
select="concat($namespacePrefix,'::', dnet:md5($pub_identifier))"/> |
|
48 |
</dri:objIdentifier> |
|
49 |
<dri:recordIdentifier> |
|
50 |
<xsl:value-of select="$pub_identifier"/> |
|
51 |
</dri:recordIdentifier> |
|
52 |
<dri:dateOfCollection> |
|
53 |
<xsl:value-of select="datetime:dateTime()"/> |
|
54 |
</dri:dateOfCollection> |
|
55 |
<dri:repositoryId> |
|
56 |
<xsl:value-of select="$dataprovider_id"/> |
|
57 |
</dri:repositoryId> |
|
58 |
<oaf:datasourceprefix> |
|
59 |
<xsl:value-of select="$namespacePrefix"/> |
|
60 |
</oaf:datasourceprefix> |
|
61 |
</oai:header> |
|
62 |
<oai:metadata> |
|
63 |
<dc:identifier> |
|
64 |
<xsl:value-of select="$pub_identifier"/> |
|
65 |
</dc:identifier> |
|
66 |
<xsl:variable name="publicationDOI"> |
|
67 |
<xsl:value-of select="./md:URI" /> |
|
68 |
</xsl:variable> |
|
69 |
<xsl:choose> |
|
70 |
<xsl:when test="string-length($publicationDOI) > 0"> |
|
71 |
<dc:identifier> |
|
72 |
<xsl:value-of select="$publicationDOI" /> |
|
73 |
</dc:identifier> |
|
74 |
</xsl:when> |
|
75 |
</xsl:choose> |
|
76 |
|
|
77 |
<dc:title> |
|
78 |
<xsl:value-of select=".//md:title"/> |
|
79 |
</dc:title> |
|
80 |
<xsl:for-each select=".//md:author"> |
|
81 |
<dc:creator> |
|
82 |
<xsl:value-of select="concat(./md:lastName,' ', md:firstName)"/> |
|
83 |
</dc:creator> |
|
84 |
</xsl:for-each> |
|
85 |
<dc:source> |
|
86 |
<xsl:value-of select="./md:source"/> |
|
87 |
</dc:source> |
|
88 |
<dr:CobjCategory>0000</dr:CobjCategory> |
|
89 |
<dc:language>und</dc:language> |
|
90 |
<oaf:journal> |
|
91 |
<xsl:value-of select="./md:source"/> |
|
92 |
</oaf:journal> |
|
93 |
<oaf:accessrights>UNKNOWN</oaf:accessrights> |
|
94 |
<xsl:variable name="projectId"> |
|
95 |
<xsl:value-of select="normalize-space(//*[local-name() ='projectid'])"/> |
|
96 |
</xsl:variable> |
|
97 |
<xsl:choose> |
|
98 |
<xsl:when test="string-length($projectId) > 0"> |
|
99 |
<oaf:projectid> |
|
100 |
<xsl:value-of |
|
101 |
select="concat('corda_______::', stringUtils:substringAfterLast($projectId, '/'))"/> |
|
102 |
</oaf:projectid> |
|
103 |
</xsl:when> |
|
104 |
</xsl:choose> |
|
105 |
<oaf:hostedBy |
|
106 |
id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" |
|
107 |
name="Unknown Repository"/> |
|
108 |
<oaf:collectedFrom> |
|
109 |
<xsl:attribute name="id"> |
|
110 |
<xsl:value-of select="$parentDatasourceId"/> |
|
17 |
<xsl:template match="/"> |
|
18 |
<xsl:variable name="identifier"> |
|
19 |
<xsl:value-of |
|
20 |
select="stringUtils:substringAfter(//md:citation/md:URI,'doi:')" /> |
|
21 |
</xsl:variable> |
|
22 |
<xsl:variable name="identifier_datacite"> |
|
23 |
<xsl:value-of select="oai:record/oai:header/dri:objIdentifier" /> |
|
24 |
</xsl:variable> |
|
25 |
<publications> |
|
26 |
<xsl:for-each select="//md:citation/md:supplementTo"> |
|
27 |
<xsl:variable name="pub_identifier"> |
|
28 |
<xsl:value-of select="./@id" /> |
|
29 |
</xsl:variable> |
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="string-length($pub_identifier)"> |
|
32 |
<xsl:call-template name="GeneratePublication"> |
|
33 |
<xsl:with-param name="pub_identifier" select="$pub_identifier" /> |
|
34 |
</xsl:call-template> |
|
35 |
</xsl:when> |
|
36 |
</xsl:choose> |
|
37 |
</xsl:for-each> |
|
38 |
</publications> |
|
39 |
</xsl:template> |
|
40 |
<xsl:template name="GeneratePublication"> |
|
41 |
<xsl:param name="pub_identifier" /> |
|
42 |
<publication> |
|
43 |
<oai:record> |
|
44 |
<oai:header> |
|
45 |
<dri:objIdentifier> |
|
46 |
<xsl:value-of |
|
47 |
select="concat($namespacePrefix,'::', dnet:md5($pub_identifier))" /> |
|
48 |
</dri:objIdentifier> |
|
49 |
<dri:recordIdentifier> |
|
50 |
<xsl:value-of select="$pub_identifier" /> |
|
51 |
</dri:recordIdentifier> |
|
52 |
<dri:dateOfCollection> |
|
53 |
<xsl:value-of select="datetime:dateTime()" /> |
|
54 |
</dri:dateOfCollection> |
|
55 |
<dri:repositoryId> |
|
56 |
<xsl:value-of select="$dataprovider_id" /> |
|
57 |
</dri:repositoryId> |
|
58 |
<oaf:datasourceprefix> |
|
59 |
<xsl:value-of select="$namespacePrefix" /> |
|
60 |
</oaf:datasourceprefix> |
|
61 |
</oai:header> |
|
62 |
<oai:metadata> |
|
63 |
<dc:identifier> |
|
64 |
<xsl:value-of select="$pub_identifier" /> |
|
65 |
</dc:identifier> |
|
66 |
<xsl:variable name="publicationDOI"> |
|
67 |
<xsl:value-of select="./md:URI" /> |
|
68 |
</xsl:variable> |
|
69 |
<xsl:choose> |
|
70 |
<xsl:when test="string-length($publicationDOI) > 0"> |
|
71 |
<dc:identifier> |
|
72 |
<xsl:value-of select="$publicationDOI" /> |
|
73 |
</dc:identifier> |
|
74 |
</xsl:when> |
|
75 |
</xsl:choose> |
|
76 |
|
|
77 |
<dc:title> |
|
78 |
<xsl:value-of select=".//md:title" /> |
|
79 |
</dc:title> |
|
80 |
<xsl:for-each select=".//md:author"> |
|
81 |
<dc:creator> |
|
82 |
<xsl:value-of select="concat(./md:lastName,' ', md:firstName)" /> |
|
83 |
</dc:creator> |
|
84 |
</xsl:for-each> |
|
85 |
<dc:source> |
|
86 |
<xsl:value-of select="./md:source" /> |
|
87 |
</dc:source> |
|
88 |
<dr:CobjCategory>0000</dr:CobjCategory> |
|
89 |
<dc:language>und</dc:language> |
|
90 |
<oaf:journal> |
|
91 |
<xsl:value-of select="./md:source" /> |
|
92 |
</oaf:journal> |
|
93 |
<oaf:accessrights>UNKNOWN</oaf:accessrights> |
|
94 |
<xsl:variable name="projectId"> |
|
95 |
<xsl:value-of select="normalize-space(//*[local-name() ='projectid'])" /> |
|
96 |
</xsl:variable> |
|
97 |
<xsl:choose> |
|
98 |
<xsl:when test="string-length($projectId) > 0"> |
|
99 |
<oaf:projectid> |
|
100 |
<xsl:value-of |
|
101 |
select="concat('corda_______::', stringUtils:substringAfterLast($projectId, '/'))" /> |
|
102 |
</oaf:projectid> |
|
103 |
</xsl:when> |
|
104 |
</xsl:choose> |
|
105 |
<xsl:variable name="journalName"> |
|
106 |
<xsl:value-of select="//*[local-name() ='journal']/@name" /> |
|
107 |
</xsl:variable> |
|
108 |
<xsl:variable name="journalISSN"> |
|
109 |
<xsl:value-of select="//*[local-name() ='journal']/@issn" /> |
|
110 |
</xsl:variable> |
|
111 |
<xsl:variable name="journalDSId"> |
|
112 |
<xsl:value-of select="//*[local-name() ='journal']/@datasourceid" /> |
|
113 |
</xsl:variable> |
|
114 |
|
|
115 |
<xsl:choose> |
|
116 |
<xsl:when test="string-length($journalISSN) > 0"> |
|
117 |
<oaf:journal issn="{$journalISSN}" eissn=""> |
|
118 |
<xsl:value-of select="$journalName" /> |
|
119 |
</oaf:journal> |
|
120 |
<oaf:hostedBy> |
|
121 |
<xsl:attribute name="id"><xsl:value-of |
|
122 |
select="$journalDSId" /></xsl:attribute> |
|
123 |
<xsl:attribute name="name"><xsl:value-of |
|
124 |
select="$journalName" /></xsl:attribute> |
|
125 |
</oaf:hostedBy> |
|
126 |
</xsl:when> |
|
127 |
<xsl:otherwise> |
|
128 |
<oaf:hostedBy |
|
129 |
id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" name="Unknown Repository" /> |
|
130 |
</xsl:otherwise> |
|
131 |
</xsl:choose> |
|
132 |
|
|
133 |
|
|
134 |
|
|
135 |
|
|
136 |
<oaf:collectedFrom> |
|
137 |
<xsl:attribute name="id"> |
|
138 |
<xsl:value-of select="$parentDatasourceId" /> |
|
111 | 139 |
</xsl:attribute> |
112 |
<xsl:attribute name="name">
|
|
113 |
<xsl:value-of select="$dataprovider_name"/> |
|
140 |
<xsl:attribute name="name">
|
|
141 |
<xsl:value-of select="$dataprovider_name" />
|
|
114 | 142 |
</xsl:attribute> |
115 |
</oaf:collectedFrom> |
|
116 |
<oaf:about> |
|
117 |
<oaf:datainfo> |
|
118 |
<oaf:inferred>false</oaf:inferred> |
|
119 |
<oaf:deletedbyinference>false</oaf:deletedbyinference> |
|
120 |
<oaf:trust>0.9</oaf:trust> |
|
121 |
<oaf:inferenceprovenance/> |
|
122 |
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive" |
|
123 |
classname="sysimport:crosswalk:datasetarchive" |
|
124 |
schemeid="dnet:provenanceActions" |
|
125 |
schemename="dnet:provenanceActions"/> |
|
126 |
</oaf:datainfo> |
|
127 |
</oaf:about> |
|
128 |
</oai:metadata> |
|
129 |
</oai:record> |
|
130 |
</publication> |
|
131 |
</xsl:template> |
|
143 |
</oaf:collectedFrom> |
|
144 |
<oaf:about> |
|
145 |
<oaf:datainfo> |
|
146 |
<oaf:inferred>false</oaf:inferred> |
|
147 |
<oaf:deletedbyinference>false</oaf:deletedbyinference> |
|
148 |
<oaf:trust>0.9</oaf:trust> |
|
149 |
<oaf:inferenceprovenance /> |
|
150 |
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive" |
|
151 |
classname="sysimport:crosswalk:datasetarchive" schemeid="dnet:provenanceActions" |
|
152 |
schemename="dnet:provenanceActions" /> |
|
153 |
</oaf:datainfo> |
|
154 |
</oaf:about> |
|
155 |
</oai:metadata> |
|
156 |
</oai:record> |
|
157 |
</publication> |
|
158 |
</xsl:template> |
|
132 | 159 |
</xsl:stylesheet> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/doaj_journals/sql/doajJournals_findMatches.sql | ||
---|---|---|
1 | 1 |
INSERT INTO hostedby_map(_dnet_resource_identifier_, oa_source_id, entry, datasourceid) |
2 | 2 |
SELECT d.issn||'@@'||d.collectedfrom, d.collectedfrom, d.issn, d.id |
3 | 3 |
FROM datasources d where |
4 |
d.collectedfrom ='openaire____::doaj' and d.issn is not null
|
|
4 |
d.collectedfrom ='driver______::1790119e-d281-4b7a-aedf-866d1d853a07' and d.issn is not null and d.id like 'doajarticles::%'
|
|
5 | 5 |
AND |
6 | 6 |
(d.collectedfrom, d.issn) NOT IN |
7 |
(SELECT oa_source_id, entry from hostedby_map) |
|
7 |
(SELECT oa_source_id, entry from hostedby_map) |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/xslt/doaj2db.xsl | ||
---|---|---|
62 | 62 |
<FIELD name="namespaceprefix"> |
63 | 63 |
<xsl:value-of select="concat('doaj', $ISSNNS)"/> |
64 | 64 |
</FIELD> |
65 |
<FIELD name="datasourceclass">pubsrepository::journal</FIELD> |
|
66 |
<FIELD name="datasourcescheme">dnet:datasource_typologies</FIELD> |
|
65 |
<FIELD name="datasourceclass">pubsrepository::journal</FIELD> |
|
67 | 66 |
<FIELD name="collectedfrom"> |
68 | 67 |
<xsl:value-of select="$parentDatasourceId"/> |
69 | 68 |
</FIELD> |
70 |
</ROW> |
|
71 |
|
|
72 |
<xsl:variable name="apiId" |
|
73 |
select="concat('api_________::', $datasourceId, '::0')"/> |
|
74 |
<ROW table="api"> |
|
75 |
<FIELD name="id"> |
|
76 |
<xsl:value-of select="$apiId"/> |
|
77 |
</FIELD> |
|
78 |
<FIELD name="_dnet_resource_identifier_"> |
|
79 |
<xsl:value-of select="$apiId"/> |
|
80 |
</FIELD> |
|
81 |
<FIELD name="protocolclass">filesystem</FIELD> |
|
82 |
<FIELD name="datasource"> |
|
83 |
<xsl:value-of select="$datasourceId"/> |
|
84 |
</FIELD> |
|
85 |
<FIELD name="contentdescriptionclass">metadata</FIELD> |
|
86 |
<FIELD name="typologyclass">pubsrepository::journal</FIELD> |
|
87 |
<FIELD name="compatibilityclass">UNKNOWN</FIELD> |
|
88 |
|
|
89 |
</ROW> |
|
90 |
|
|
91 |
<ROW table="apicollections"> |
|
92 |
<FIELD name="api"> |
|
93 |
<xsl:value-of select="$apiId"/> |
|
94 |
</FIELD> |
|
95 |
<FIELD name="param">baseUrl</FIELD> |
|
96 |
<FIELD name="_dnet_resource_identifier_"> |
|
97 |
<xsl:value-of select="concat($apiId, '@@baseUrl')"/> |
|
98 |
</FIELD> |
|
99 |
<xsl:if test="string-length(normalize-space('/dev/null')) > 0"> |
|
100 |
<FIELD name="original"> |
|
101 |
<xsl:value-of select="normalize-space('/dev/null')"/> |
|
102 |
</FIELD> |
|
103 |
</xsl:if> |
|
104 |
</ROW> |
|
105 |
|
|
106 |
|
|
107 |
<ROW table="apicollections"> |
|
108 |
<FIELD name="api"> |
|
109 |
<xsl:value-of select="$apiId"/> |
|
110 |
</FIELD> |
|
111 |
<FIELD name="param">metadata_identifier_path</FIELD> |
|
112 |
<FIELD name="_dnet_resource_identifier_"> |
|
113 |
<xsl:value-of select="concat($apiId, '@@metadata_identifier_path')"/> |
|
114 |
</FIELD> |
|
115 |
<FIELD name="original" |
|
116 |
>//*[local-name()='header']/*[local-name()='identifier']</FIELD> |
|
117 |
<FIELD name="accessparam" type="boolean">false</FIELD> |
|
118 |
</ROW> |
|
119 |
|
|
120 |
|
|
121 |
|
|
69 |
</ROW> |
|
122 | 70 |
</ROWS> |
123 | 71 |
</metadata> |
124 | 72 |
</record> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/applicationContext-repohi.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
|
3 | 3 |
<beans xmlns="http://www.springframework.org/schema/beans" |
4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" |
|
5 |
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd"> |
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
<bean id="metaWfOpenaireAggregatorPubsMdRecords" |
|
10 |
class="eu.dnetlib.msro.workflows.metawf.DatasourceMetaWorkflow" |
Also available in: Unified diff
implemented pangea by journal workflow