Revision 30171
Added by Claudio Atzori about 10 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/dm.provision.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 | 1 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
3 | 2 |
<HEADER> |
4 | 3 |
<RESOURCE_IDENTIFIER value="a4434d62-d4cd-4c73-a107-bc7c62e6f815_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
... | ... | |
14 | 13 |
<CONFIGURATION start="manual"> |
15 | 14 |
|
16 | 15 |
<!-- PREPARE NODES --> |
17 |
<NODE name="setInfo" type="SetFormatInfo" isStart="true"> |
|
18 |
<DESCRIPTION /> |
|
19 |
<PARAMETERS> |
|
20 |
<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM> |
|
21 |
<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM> |
|
22 |
<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM> |
|
23 |
</PARAMETERS> |
|
24 |
<ARCS> |
|
25 |
<ARC to="fetchRelClasses" /> |
|
26 |
<ARC to="fetchContexts" /> |
|
27 |
<ARC to="fetchEntityLinks" /> |
|
28 |
</ARCS> |
|
29 |
</NODE> |
|
30 |
<NODE name="fetchRelClasses" type="FetchRelClasses"> |
|
31 |
<DESCRIPTION /> |
|
32 |
<PARAMETERS> |
|
33 |
<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM> |
|
34 |
<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM> |
|
35 |
</PARAMETERS> |
|
36 |
<ARCS> |
|
37 |
<ARC to="groupEntities" /> |
|
38 |
</ARCS> |
|
39 |
</NODE> |
|
40 |
<NODE name="fetchContexts" type="LoadContextsJob"> |
|
41 |
<DESCRIPTION /> |
|
42 |
<PARAMETERS/> |
|
43 |
<ARCS> |
|
44 |
<ARC to="groupEntities" /> |
|
45 |
</ARCS> |
|
46 |
</NODE> |
|
47 |
<NODE name="fetchEntityLinks" type="LoadEntityLinksJob"> |
|
48 |
<DESCRIPTION /> |
|
49 |
<PARAMETERS/> |
|
50 |
<ARCS> |
|
51 |
<ARC to="groupEntities" /> |
|
52 |
<ARC to="prepareStats" /> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
16 |
<NODE name="setInfo" type="SetFormatInfo" isStart="true"> |
|
17 |
<DESCRIPTION/> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM> |
|
20 |
<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM> |
|
21 |
<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM> |
|
22 |
</PARAMETERS> |
|
23 |
<ARCS> |
|
24 |
<ARC to="findIndex"/> |
|
25 |
</ARCS> |
|
26 |
</NODE> |
|
27 |
<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true"> |
|
28 |
<DESCRIPTION/> |
|
29 |
<PARAMETERS> |
|
30 |
<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM> |
|
31 |
<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM> |
|
32 |
</PARAMETERS> |
|
33 |
<ARCS> |
|
34 |
<ARC to="groupEntities"/> |
|
35 |
</ARCS> |
|
36 |
</NODE> |
|
37 |
<NODE name="fetchContexts" type="LoadContextsJob" isStart="true"> |
|
38 |
<DESCRIPTION/> |
|
39 |
<PARAMETERS/> |
|
40 |
<ARCS> |
|
41 |
<ARC to="groupEntities"/> |
|
42 |
</ARCS> |
|
43 |
</NODE> |
|
44 |
<NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true"> |
|
45 |
<DESCRIPTION/> |
|
46 |
<PARAMETERS/> |
|
47 |
<ARCS> |
|
48 |
<ARC to="groupEntities"/> |
|
49 |
<ARC to="prepareStats"/> |
|
50 |
</ARCS> |
|
51 |
</NODE> |
|
55 | 52 |
|
56 | 53 |
<!-- UPDATE INDEX --> |
57 |
<NODE name="findIndex" type="FindIndex" isStart="true">
|
|
58 |
<DESCRIPTION />
|
|
59 |
<PARAMETERS/>
|
|
60 |
<ARCS>
|
|
61 |
<ARC name="found" to="prepare" />
|
|
62 |
<ARC name="notFound" to="createIndex" />
|
|
63 |
</ARCS>
|
|
64 |
</NODE>
|
|
65 |
<NODE name="createIndex" type="CreateIndex">
|
|
66 |
<DESCRIPTION />
|
|
67 |
<PARAMETERS />
|
|
68 |
<ARCS>
|
|
69 |
<ARC to="prepare" />
|
|
70 |
</ARCS>
|
|
71 |
</NODE>
|
|
72 |
<NODE name="prepare" type="PrepareIndexJob">
|
|
73 |
<DESCRIPTION>Prepare indexing</DESCRIPTION>
|
|
74 |
<PARAMETERS>
|
|
75 |
<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
|
|
76 |
<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
|
|
77 |
<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
|
|
78 |
<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
|
|
79 |
</PARAMETERS>
|
|
80 |
<ARCS>
|
|
81 |
<ARC to="cleanupXml" />
|
|
82 |
<ARC to="cleanupRotten" />
|
|
83 |
</ARCS>
|
|
84 |
</NODE>
|
|
85 |
<NODE name="cleanupXml" type="DeleteHdfsPathJob">
|
|
86 |
<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
|
|
87 |
<PARAMETERS>
|
|
88 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
89 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
54 |
<NODE name="findIndex" type="FindIndex">
|
|
55 |
<DESCRIPTION/>
|
|
56 |
<PARAMETERS/>
|
|
57 |
<ARCS>
|
|
58 |
<ARC name="found" to="prepareIndexing"/>
|
|
59 |
<ARC name="notFound" to="createIndex"/>
|
|
60 |
</ARCS>
|
|
61 |
</NODE>
|
|
62 |
<NODE name="createIndex" type="CreateIndex">
|
|
63 |
<DESCRIPTION/>
|
|
64 |
<PARAMETERS/>
|
|
65 |
<ARCS>
|
|
66 |
<ARC to="prepareIndexing"/>
|
|
67 |
</ARCS>
|
|
68 |
</NODE>
|
|
69 |
<NODE name="prepareIndexing" type="PrepareIndexJob">
|
|
70 |
<DESCRIPTION>Prepare indexing</DESCRIPTION>
|
|
71 |
<PARAMETERS>
|
|
72 |
<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
|
|
73 |
<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
|
|
74 |
<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
|
|
75 |
<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
|
|
76 |
</PARAMETERS>
|
|
77 |
<ARCS>
|
|
78 |
<ARC to="cleanupXml"/>
|
|
79 |
<ARC to="cleanupRotten"/>
|
|
80 |
</ARCS>
|
|
81 |
</NODE>
|
|
82 |
<NODE name="cleanupXml" type="DeleteHdfsPathJob">
|
|
83 |
<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
|
|
84 |
<PARAMETERS>
|
|
85 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
86 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
90 | 87 |
{ |
91 |
'path' : 'outputRecordsPath'
|
|
88 |
'path' : 'hdfsRecordsPath'
|
|
92 | 89 |
} |
93 |
</PARAM>
|
|
94 |
</PARAMETERS>
|
|
95 |
<ARCS>
|
|
96 |
<ARC to="groupEntities" />
|
|
97 |
</ARCS>
|
|
98 |
</NODE>
|
|
99 |
<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
|
|
100 |
<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
|
|
101 |
<PARAMETERS>
|
|
102 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
103 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
90 |
</PARAM> |
|
91 |
</PARAMETERS>
|
|
92 |
<ARCS>
|
|
93 |
<ARC to="groupEntities"/>
|
|
94 |
</ARCS>
|
|
95 |
</NODE>
|
|
96 |
<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
|
|
97 |
<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
|
|
98 |
<PARAMETERS>
|
|
99 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
100 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
104 | 101 |
{ |
105 | 102 |
'path' : 'rottenRecordsPath' |
106 | 103 |
} |
107 |
</PARAM>
|
|
108 |
</PARAMETERS>
|
|
109 |
<ARCS>
|
|
110 |
<ARC to="groupEntities" />
|
|
111 |
</ARCS>
|
|
112 |
</NODE>
|
|
113 |
<NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
|
|
114 |
<DESCRIPTION>M/R group entities</DESCRIPTION>
|
|
115 |
<PARAMETERS>
|
|
116 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
117 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
|
|
118 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
104 |
</PARAM> |
|
105 |
</PARAMETERS>
|
|
106 |
<ARCS>
|
|
107 |
<ARC to="groupEntities"/>
|
|
108 |
</ARCS>
|
|
109 |
</NODE>
|
|
110 |
<NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
|
|
111 |
<DESCRIPTION>M/R group entities</DESCRIPTION>
|
|
112 |
<PARAMETERS>
|
|
113 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
114 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
|
|
115 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
119 | 116 |
{ |
120 | 117 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable', |
121 | 118 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable' |
122 | 119 |
} |
123 | 120 |
</PARAM> |
124 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
121 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
125 | 122 |
{ |
126 | 123 |
'mapred.output.dir' : 'hdfsRecordsPath', |
127 | 124 |
'index.entity.links' : 'index.entity.links', |
... | ... | |
130 | 127 |
'relClasses' : 'relClasses' |
131 | 128 |
} |
132 | 129 |
</PARAM> |
133 |
</PARAMETERS>
|
|
134 |
<ARCS>
|
|
135 |
<ARC to="updateIndex" />
|
|
136 |
<ARC to="setOAIFormat" />
|
|
137 |
</ARCS>
|
|
138 |
</NODE>
|
|
139 |
<NODE name="updateIndex" type="SubmitHadoopJob">
|
|
140 |
<DESCRIPTION>M/O index records</DESCRIPTION>
|
|
141 |
<PARAMETERS>
|
|
142 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
143 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
|
|
144 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
130 |
</PARAMETERS>
|
|
131 |
<ARCS>
|
|
132 |
<ARC to="updateIndex"/>
|
|
133 |
<ARC to="setOAIFormat"/>
|
|
134 |
</ARCS>
|
|
135 |
</NODE>
|
|
136 |
<NODE name="updateIndex" type="SubmitHadoopJob">
|
|
137 |
<DESCRIPTION>M/O index records</DESCRIPTION>
|
|
138 |
<PARAMETERS>
|
|
139 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
140 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
|
|
141 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
145 | 142 |
{ |
146 | 143 |
'mapred.input.dir' : 'hdfsRecordsPath', |
147 | 144 |
'mapred.output.dir' : 'rottenRecordsPath', |
... | ... | |
157 | 154 |
'index.feed.timestamp' : 'index.feed.timestamp' |
158 | 155 |
} |
159 | 156 |
</PARAM> |
160 |
</PARAMETERS>
|
|
161 |
<ARCS>
|
|
162 |
<ARC to="finalize" />
|
|
163 |
</ARCS>
|
|
164 |
</NODE>
|
|
165 |
<NODE name="finalize" type="FinalizeIndexFeeding">
|
|
166 |
<DESCRIPTION>commit changes</DESCRIPTION>
|
|
167 |
<PARAMETERS />
|
|
168 |
<ARCS>
|
|
169 |
<ARC to="updateDs" />
|
|
170 |
</ARCS>
|
|
171 |
</NODE>
|
|
172 |
<NODE name="updateDs" type="IndexDsUpdateJob">
|
|
173 |
<DESCRIPTION>update DS</DESCRIPTION>
|
|
174 |
<PARAMETERS />
|
|
175 |
<ARCS>
|
|
176 |
<ARC to="sync" />
|
|
177 |
</ARCS>
|
|
178 |
</NODE>
|
|
157 |
</PARAMETERS>
|
|
158 |
<ARCS>
|
|
159 |
<ARC to="finalize"/>
|
|
160 |
</ARCS>
|
|
161 |
</NODE>
|
|
162 |
<NODE name="finalize" type="FinalizeIndexFeeding">
|
|
163 |
<DESCRIPTION>commit changes</DESCRIPTION>
|
|
164 |
<PARAMETERS/>
|
|
165 |
<ARCS>
|
|
166 |
<ARC to="updateDs"/>
|
|
167 |
</ARCS>
|
|
168 |
</NODE>
|
|
169 |
<NODE name="updateDs" type="IndexDsUpdateJob">
|
|
170 |
<DESCRIPTION>update DS</DESCRIPTION>
|
|
171 |
<PARAMETERS/>
|
|
172 |
<ARCS>
|
|
173 |
<ARC to="sync"/>
|
|
174 |
</ARCS>
|
|
175 |
</NODE>
|
|
179 | 176 |
|
180 | 177 |
<!-- UPDATE STATS --> |
181 |
<NODE name="prepareStats" type="PrepareStatsParams">
|
|
182 |
<DESCRIPTION />
|
|
183 |
<PARAMETERS>
|
|
184 |
<PARAM required="true" type="string" name="paramsMapJson" managedBy="user">
|
|
178 |
<NODE name="prepareStats" type="PrepareStatsParams">
|
|
179 |
<DESCRIPTION/>
|
|
180 |
<PARAMETERS>
|
|
181 |
<PARAM required="true" type="string" name="paramsMapJson" managedBy="user">
|
|
185 | 182 |
{ |
186 |
'oozieWfApplicationPath' : '/user/eri.katsari/stats/oozie_app',
|
|
187 |
'statsDbUrl' : 'jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats',
|
|
183 |
'oozieWfApplicationPath' : '/user/dnet/lib/stats/oozie_app',
|
|
184 |
'statsDbUrl' : ' jdbc:postgresql://stats.openaire.eu:5432/stats',
|
|
188 | 185 |
'statsDbUser' : 'sqoop', |
189 | 186 |
'statsDbPass' : 'sqoop', |
190 | 187 |
'statsDbDriver' : 'org.postgresql.Driver', |
191 |
'statsDbTableMap' : 'datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultResult=result_results,resultTopic=result_topics,category=category,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources',
|
|
192 |
'statsSqoopRecsPerStatement' : '1000',
|
|
193 |
'statsSqoopStatementPerTrans' : '1000', |
|
188 |
'statsDbTableMap' : 'datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources',
|
|
189 |
'statsSqoopRecsPerStatement' : '10000',
|
|
190 |
'statsSqoopStatementPerTrans' : '1000000',
|
|
194 | 191 |
'statsSqoopReducersCount' : '4', |
195 | 192 |
|
196 |
'statsOutputPath' : '/tmp/test_stats/',
|
|
193 |
'statsOutputPath' : '/tmp/stats/', |
|
197 | 194 |
'statsNullStringField' : 'NULL', |
198 | 195 |
'statsNullNumericField' : '-1', |
199 |
'statsEnclosingCharacter' : '*',
|
|
196 |
'statsEnclosingCharacter' : '#',
|
|
200 | 197 |
'statsDelimCharacter' : '!', |
201 |
|
|
202 |
'out1' : 'datasource', |
|
198 |
'out1' : 'datasource', |
|
203 | 199 |
'out2' : 'project', |
204 | 200 |
'out3' : 'organization', |
205 | 201 |
'out4' : 'datasourceOrganization', |
... | ... | |
220 | 216 |
'out19' : 'claim', |
221 | 217 |
'out20' : 'concept' |
222 | 218 |
} |
223 |
</PARAM>
|
|
224 |
</PARAMETERS>
|
|
225 |
<ARCS>
|
|
226 |
<ARC to="updateStats" />
|
|
227 |
</ARCS>
|
|
228 |
</NODE>
|
|
229 |
<NODE name="updateStats" type="SubmitHadoopJob">
|
|
230 |
<DESCRIPTION>Update stats DB</DESCRIPTION>
|
|
231 |
<PARAMETERS>
|
|
232 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
233 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>
|
|
234 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
219 |
</PARAM> |
|
220 |
</PARAMETERS>
|
|
221 |
<ARCS>
|
|
222 |
<ARC to="updateStats"/>
|
|
223 |
</ARCS>
|
|
224 |
</NODE>
|
|
225 |
<NODE name="updateStats" type="SubmitHadoopJob">
|
|
226 |
<DESCRIPTION>Update stats DB</DESCRIPTION>
|
|
227 |
<PARAMETERS>
|
|
228 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
|
|
229 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>
|
|
230 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
235 | 231 |
{ |
236 | 232 |
'Stats_Hbase_Source_Table' : 'hbase.mapred.datatable' |
237 | 233 |
} |
238 | 234 |
</PARAM> |
239 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
235 |
<PARAM required="true" type="string" name="envParams" managedBy="system">
|
|
240 | 236 |
{ |
241 | 237 |
'nameNode' : 'nameNode', |
242 | 238 |
'jobTracker' : 'jobTracker', |
... | ... | |
253 | 249 |
'Stats_null_String_Field' : 'statsNullStringField', |
254 | 250 |
'Stats_null_Numeric_Field' : 'statsNullNumericField', |
255 | 251 |
'Stats_enclosing_Character' : 'statsEnclosingCharacter', |
256 |
'Stats_delim_Character' : 'statsDelimCharacter',
|
|
257 |
'out1' : 'out1',
|
|
252 |
'Stats_delim_Character' : 'statsDelimCharacter',
|
|
253 |
'out1' : 'out1',
|
|
258 | 254 |
'out2' : 'out2', |
259 | 255 |
'out3' : 'out3', |
260 | 256 |
'out4' : 'out4', |
... | ... | |
278 | 274 |
'Stats_indexConf' : 'index.entity.links' |
279 | 275 |
} |
280 | 276 |
</PARAM> |
281 |
</PARAMETERS> |
|
282 |
<ARCS> |
|
283 |
<ARC to="sync" /> |
|
284 |
</ARCS> |
|
285 |
</NODE> |
|
286 |
|
|
277 |
</PARAMETERS> |
|
278 |
<ARCS> |
|
279 |
<ARC to="sync"/> |
|
280 |
</ARCS> |
|
281 |
</NODE> |
|
287 | 282 |
<!-- WAIT FOR INDEX AND STATS TO COMPLETE --> |
288 |
<NODE name="sync" isJoin="true"> |
|
289 |
<DESCRIPTION /> |
|
290 |
<PARAMETERS /> |
|
291 |
<ARCS> |
|
292 |
<ARC to="findSearchService" /> |
|
293 |
</ARCS> |
|
294 |
</NODE> |
|
295 |
|
|
296 |
<NODE name="findSearchService" type="FindSearchService"> |
|
297 |
<DESCRIPTION>find search service</DESCRIPTION> |
|
298 |
<PARAMETERS> |
|
299 |
<PARAM name="xquery" type="string" managedBy="user" required="false">/RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='SearchServiceResourceType' and .//RESOURCE_KIND/@value='ServiceResources']/HEADER/RESOURCE_IDENTIFIER/@value/string()</PARAM> |
|
300 |
<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM> |
|
301 |
</PARAMETERS> |
|
302 |
<ARCS> |
|
303 |
<ARC name="found" to="switch" /> |
|
304 |
<ARC name="notFound" to="success" /> |
|
305 |
</ARCS> |
|
306 |
</NODE> |
|
307 |
<NODE name="switch" type="SwitchIndex"> |
|
308 |
<DESCRIPTION>switch index</DESCRIPTION> |
|
309 |
<PARAMETERS> |
|
310 |
<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM> |
|
311 |
<PARAM name="inputIndexIdParam" type="string" managedBy="system" required="true">index_id</PARAM> |
|
312 |
<PARAM name="outputIndexIdParam" type="string" managedBy="system" required="true">IndexId</PARAM> |
|
313 |
</PARAMETERS> |
|
314 |
<ARCS> |
|
315 |
<ARC to="success" /> |
|
316 |
</ARCS> |
|
317 |
</NODE> |
|
283 |
<NODE name="sync" isJoin="true"> |
|
284 |
<DESCRIPTION/> |
|
285 |
<PARAMETERS/> |
|
286 |
<ARCS> |
|
287 |
<ARC to="findSearchService"/> |
|
288 |
</ARCS> |
|
289 |
</NODE> |
|
290 |
<NODE name="findSearchService" type="FindSearchService"> |
|
291 |
<DESCRIPTION>find search service</DESCRIPTION> |
|
292 |
<PARAMETERS> |
|
293 |
<PARAM name="xquery" type="string" managedBy="user" required="false">/RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='SearchServiceResourceType' and .//RESOURCE_KIND/@value='ServiceResources']/HEADER/RESOURCE_IDENTIFIER/@value/string()</PARAM> |
|
294 |
<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM> |
|
295 |
</PARAMETERS> |
|
296 |
<ARCS> |
|
297 |
<ARC name="found" to="switch"/> |
|
298 |
<ARC name="notFound" to="success"/> |
|
299 |
</ARCS> |
|
300 |
</NODE> |
|
301 |
<NODE name="switch" type="SwitchIndex"> |
|
302 |
<DESCRIPTION>switch index</DESCRIPTION> |
|
303 |
<PARAMETERS> |
|
304 |
<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM> |
|
305 |
<PARAM name="inputIndexIdParam" type="string" managedBy="system" required="true">index_id</PARAM> |
|
306 |
<PARAM name="outputIndexIdParam" type="string" managedBy="system" required="true">IndexId</PARAM> |
|
307 |
</PARAMETERS> |
|
308 |
<ARCS> |
|
309 |
<ARC to="success"/> |
|
310 |
</ARCS> |
|
311 |
</NODE> |
|
318 | 312 |
|
319 | 313 |
<!-- OAI STORE UPDATE --> |
320 |
<NODE name="setOAIFormat" type="SetFormatInfo"> |
|
321 |
<DESCRIPTION /> |
|
322 |
<PARAMETERS> |
|
323 |
<PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM> |
|
324 |
<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM> |
|
325 |
<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM> |
|
326 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
327 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
328 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
329 |
</PARAMETERS> |
|
330 |
<ARCS> |
|
331 |
<ARC to="prepareOAI" /> |
|
332 |
</ARCS> |
|
333 |
</NODE> |
|
334 |
<NODE name="prepareOAI" type="PrepareOaiJob"> |
|
335 |
<DESCRIPTION>Prepare oai feeding</DESCRIPTION> |
|
336 |
<PARAMETERS> |
|
337 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM> |
|
338 |
<PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM> |
|
339 |
<PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM> |
|
340 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
341 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
342 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
343 |
</PARAMETERS> |
|
344 |
<ARCS> |
|
345 |
<ARC to="OAIDropStore" /> |
|
346 |
</ARCS> |
|
347 |
</NODE> |
|
348 |
<NODE name="OAIDropStore" type="OAIDropStore" > |
|
349 |
<DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION> |
|
350 |
<PARAMETERS> |
|
351 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM> |
|
352 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
353 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
354 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
355 |
</PARAMETERS> |
|
356 |
<ARCS> |
|
357 |
<ARC to="OAICreateStore" /> |
|
358 |
</ARCS> |
|
359 |
</NODE> |
|
360 |
<NODE name="OAICreateStore" type="OAICreateStore" > |
|
361 |
<DESCRIPTION /> |
|
362 |
<PARAMETERS> |
|
363 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM> |
|
364 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
365 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
366 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
367 |
</PARAMETERS> |
|
368 |
<ARCS> |
|
369 |
<ARC to="feedOAI" /> |
|
370 |
</ARCS> |
|
371 |
</NODE> |
|
372 |
|
|
373 |
<NODE name="feedOAI" type="SubmitHadoopJob"> |
|
374 |
<DESCRIPTION>M/O oai feeding</DESCRIPTION> |
|
375 |
<PARAMETERS> |
|
376 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
377 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM> |
|
378 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
314 |
<NODE name="setOAIFormat" type="SetFormatInfo"> |
|
315 |
<DESCRIPTION/> |
|
316 |
<PARAMETERS> |
|
317 |
<PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM> |
|
318 |
<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM> |
|
319 |
<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM> |
|
320 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
321 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
322 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
323 |
</PARAMETERS> |
|
324 |
<ARCS> |
|
325 |
<ARC to="prepareOAI"/> |
|
326 |
</ARCS> |
|
327 |
</NODE> |
|
328 |
<NODE name="prepareOAI" type="PrepareOaiJob"> |
|
329 |
<DESCRIPTION>Prepare oai feeding</DESCRIPTION> |
|
330 |
<PARAMETERS> |
|
331 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM> |
|
332 |
<PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM> |
|
333 |
<PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM> |
|
334 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
335 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
336 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
337 |
</PARAMETERS> |
|
338 |
<ARCS> |
|
339 |
<ARC to="OAIDropStore"/> |
|
340 |
</ARCS> |
|
341 |
</NODE> |
|
342 |
<NODE name="OAIDropStore" type="OAIDropStore"> |
|
343 |
<DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION> |
|
344 |
<PARAMETERS> |
|
345 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM> |
|
346 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
347 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
348 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
349 |
</PARAMETERS> |
|
350 |
<ARCS> |
|
351 |
<ARC to="OAICreateStore"/> |
|
352 |
</ARCS> |
|
353 |
</NODE> |
|
354 |
<NODE name="OAICreateStore" type="OAICreateStore"> |
|
355 |
<DESCRIPTION/> |
|
356 |
<PARAMETERS> |
|
357 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM> |
|
358 |
<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM> |
|
359 |
<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM> |
|
360 |
<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM> |
|
361 |
</PARAMETERS> |
|
362 |
<ARCS> |
|
363 |
<ARC to="feedOAI"/> |
|
364 |
</ARCS> |
|
365 |
</NODE> |
|
366 |
<NODE name="feedOAI" type="SubmitHadoopJob"> |
|
367 |
<DESCRIPTION>M/O oai feeding</DESCRIPTION> |
|
368 |
<PARAMETERS> |
|
369 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
370 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM> |
|
371 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
379 | 372 |
{ |
380 | 373 |
'mapred.input.dir' : 'hdfsRecordsPath', |
381 | 374 |
'services.publisher.oai.collection' : 'oaiStoreCollection', |
... | ... | |
384 | 377 |
'services.publisher.oai.db' : 'oai_dbName' |
385 | 378 |
} |
386 | 379 |
</PARAM> |
387 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
380 |
<PARAM required="true" type="string" name="sysParams" managedBy="system">
|
|
388 | 381 |
{ |
389 | 382 |
'services.publisher.oai.host' : 'services.publisher.oai.host', |
390 | 383 |
'services.publisher.oai.port' : 'services.publisher.oai.port' |
391 | 384 |
} |
392 |
</PARAM>
|
|
393 |
</PARAMETERS>
|
|
394 |
<ARCS>
|
|
395 |
<ARC to="CompoundIndexes" />
|
|
396 |
</ARCS>
|
|
397 |
</NODE>
|
|
398 |
<NODE name="CompoundIndexes" type="OAICreateIndex">
|
|
399 |
<DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
|
|
400 |
<PARAMETERS>
|
|
401 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
402 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
403 |
<PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
|
|
404 |
</PARAMETERS>
|
|
405 |
<ARCS>
|
|
406 |
<ARC to="ConfigIndexes" />
|
|
407 |
</ARCS>
|
|
408 |
</NODE>
|
|
409 |
<NODE name="ConfigIndexes" type="OAIEnsureIndexes" >
|
|
410 |
<DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
|
|
411 |
<PARAMETERS>
|
|
412 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
413 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
414 |
</PARAMETERS>
|
|
415 |
<ARCS>
|
|
416 |
<ARC to="SetsCount" />
|
|
417 |
<ARC to="ConfigSets" />
|
|
418 |
</ARCS>
|
|
419 |
</NODE>
|
|
420 |
<NODE name="SetsCount" type="OAISetsCountUpdate">
|
|
421 |
<DESCRIPTION>Count records in each OAI set, for each exported metadata format linked to the given oai collection</DESCRIPTION>
|
|
422 |
<PARAMETERS>
|
|
423 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
424 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
425 |
<PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>
|
|
426 |
</PARAMETERS>
|
|
427 |
<ARCS>
|
|
428 |
<ARC to="SetOAIDB" />
|
|
429 |
</ARCS>
|
|
430 |
</NODE>
|
|
431 |
<NODE name="ConfigSets" type="RefreshSetsFromConfig">
|
|
432 |
<DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
|
|
433 |
<PARAMETERS>
|
|
434 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
435 |
</PARAMETERS>
|
|
436 |
<ARCS>
|
|
437 |
<ARC to="SetOAIDB" />
|
|
438 |
</ARCS>
|
|
439 |
</NODE>
|
|
440 |
<NODE name="SetOAIDB" type="SetCurrentOAIDB" isJoin="true">
|
|
441 |
<DESCRIPTION>Update the currentdb in the OAI configuration profile to the value in oai_dbName</DESCRIPTION>
|
|
442 |
<PARAMETERS>
|
|
443 |
<PARAM name="oaiDBNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
444 |
</PARAMETERS>
|
|
445 |
<ARCS>
|
|
446 |
<ARC to="success" />
|
|
447 |
</ARCS>
|
|
448 |
</NODE>
|
|
385 |
</PARAM> |
|
386 |
</PARAMETERS>
|
|
387 |
<ARCS>
|
|
388 |
<ARC to="CompoundIndexes"/>
|
|
389 |
</ARCS>
|
|
390 |
</NODE>
|
|
391 |
<NODE name="CompoundIndexes" type="OAICreateIndex">
|
|
392 |
<DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
|
|
393 |
<PARAMETERS>
|
|
394 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
395 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
396 |
<PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
|
|
397 |
</PARAMETERS>
|
|
398 |
<ARCS>
|
|
399 |
<ARC to="ConfigIndexes"/>
|
|
400 |
</ARCS>
|
|
401 |
</NODE>
|
|
402 |
<NODE name="ConfigIndexes" type="OAIEnsureIndexes">
|
|
403 |
<DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
|
|
404 |
<PARAMETERS>
|
|
405 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
406 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
407 |
</PARAMETERS>
|
|
408 |
<ARCS>
|
|
409 |
<ARC to="SetsCount"/>
|
|
410 |
<ARC to="ConfigSets"/>
|
|
411 |
</ARCS>
|
|
412 |
</NODE>
|
|
413 |
<NODE name="SetsCount" type="OAISetsCountUpdate">
|
|
414 |
<DESCRIPTION>Count records in each OAI set, for each exported metadata format linked to the given oai collection</DESCRIPTION>
|
|
415 |
<PARAMETERS>
|
|
416 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
417 |
<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
|
|
418 |
<PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>
|
|
419 |
</PARAMETERS>
|
|
420 |
<ARCS>
|
|
421 |
<ARC to="SetOAIDB"/>
|
|
422 |
</ARCS>
|
|
423 |
</NODE>
|
|
424 |
<NODE name="ConfigSets" type="RefreshSetsFromConfig">
|
|
425 |
<DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
|
|
426 |
<PARAMETERS>
|
|
427 |
<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
428 |
</PARAMETERS>
|
|
429 |
<ARCS>
|
|
430 |
<ARC to="SetOAIDB"/>
|
|
431 |
</ARCS>
|
|
432 |
</NODE>
|
|
433 |
<NODE name="SetOAIDB" type="SetCurrentOAIDB" isJoin="true">
|
|
434 |
<DESCRIPTION>Update the currentdb in the OAI configuration profile to the value in oai_dbName</DESCRIPTION>
|
|
435 |
<PARAMETERS>
|
|
436 |
<PARAM name="oaiDBNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
|
|
437 |
</PARAMETERS>
|
|
438 |
<ARCS>
|
|
439 |
<ARC to="success"/>
|
|
440 |
</ARCS>
|
|
441 |
</NODE>
|
|
449 | 442 |
</CONFIGURATION> |
450 |
<STATUS /> |
|
443 |
<STATUS> |
|
444 |
<LAST_EXECUTION_ID></LAST_EXECUTION_ID> |
|
445 |
<LAST_EXECUTION_DATE></LAST_EXECUTION_DATE> |
|
446 |
<LAST_EXECUTION_STATUS></LAST_EXECUTION_STATUS> |
|
447 |
<LAST_EXECUTION_ERROR></LAST_EXECUTION_ERROR> |
|
448 |
</STATUS> |
|
451 | 449 |
</BODY> |
452 |
</RESOURCE_PROFILE> |
|
450 |
</RESOURCE_PROFILE> |
Also available in: Unified diff
updated provision workflow