Project

General

Profile

« Previous | Next » 

Revision 30171

updated provision workflow

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/dm.provision.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2 1
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3 2
    <HEADER>
4 3
        <RESOURCE_IDENTIFIER value="a4434d62-d4cd-4c73-a107-bc7c62e6f815_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
......
14 13
        <CONFIGURATION start="manual">
15 14
        
16 15
        	<!-- PREPARE NODES -->
17
			<NODE name="setInfo" type="SetFormatInfo" isStart="true">
18
				<DESCRIPTION />
19
				<PARAMETERS>
20
					<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM>
21
					<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
22
					<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
23
				</PARAMETERS>
24
				<ARCS>
25
					<ARC to="fetchRelClasses" />
26
					<ARC to="fetchContexts" />
27
					<ARC to="fetchEntityLinks" />					
28
				</ARCS>
29
			</NODE>        	
30
			<NODE name="fetchRelClasses" type="FetchRelClasses">
31
				<DESCRIPTION />
32
				<PARAMETERS>
33
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
34
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
35
				</PARAMETERS>
36
				<ARCS>
37
					<ARC to="groupEntities" />
38
				</ARCS>
39
			</NODE>
40
			<NODE name="fetchContexts" type="LoadContextsJob">
41
				<DESCRIPTION />
42
				<PARAMETERS/>
43
				<ARCS>
44
					<ARC to="groupEntities" />
45
				</ARCS>
46
			</NODE>
47
			<NODE name="fetchEntityLinks" type="LoadEntityLinksJob">
48
				<DESCRIPTION />
49
				<PARAMETERS/>
50
				<ARCS>
51
					<ARC to="groupEntities" />
52
					<ARC to="prepareStats" />
53
				</ARCS>
54
			</NODE>
16
            <NODE name="setInfo" type="SetFormatInfo" isStart="true">
17
                <DESCRIPTION/>
18
                <PARAMETERS>
19
                    <PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM>
20
                    <PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
21
                    <PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
22
                </PARAMETERS>
23
                <ARCS>
24
                    <ARC to="findIndex"/>
25
                </ARCS>
26
            </NODE>
27
            <NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
28
                <DESCRIPTION/>
29
                <PARAMETERS>
30
                    <PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
31
                    <PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
32
                </PARAMETERS>
33
                <ARCS>
34
                    <ARC to="groupEntities"/>
35
                </ARCS>
36
            </NODE>
37
            <NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
38
                <DESCRIPTION/>
39
                <PARAMETERS/>
40
                <ARCS>
41
                    <ARC to="groupEntities"/>
42
                </ARCS>
43
            </NODE>
44
            <NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
45
                <DESCRIPTION/>
46
                <PARAMETERS/>
47
                <ARCS>
48
                    <ARC to="groupEntities"/>
49
                    <ARC to="prepareStats"/>
50
                </ARCS>
51
            </NODE>
55 52
			
56 53
			<!-- UPDATE INDEX -->
57
			<NODE name="findIndex" type="FindIndex" isStart="true">
58
				<DESCRIPTION />
59
				<PARAMETERS/>
60
				<ARCS>
61
					<ARC name="found" to="prepare" />
62
					<ARC name="notFound" to="createIndex" />
63
				</ARCS>
64
			</NODE>
65
			<NODE name="createIndex" type="CreateIndex">
66
				<DESCRIPTION />
67
				<PARAMETERS />
68
				<ARCS>
69
					<ARC to="prepare" />
70
				</ARCS>
71
			</NODE>
72
			<NODE name="prepare" type="PrepareIndexJob">
73
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
74
				<PARAMETERS>
75
					<PARAM name="outputRecordsPathParam"  type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
76
					<PARAM name="rottenRecordsPathParam"  type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
77
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
78
					<PARAM name="oafSchemaLocationProperty"  type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
79
				</PARAMETERS>
80
				<ARCS>
81
					<ARC to="cleanupXml" />
82
					<ARC to="cleanupRotten" />
83
				</ARCS>
84
			</NODE>
85
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
86
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
89
					<PARAM required="true" type="string" name="envParams" managedBy="system">
54
            <NODE name="findIndex" type="FindIndex">
55
                <DESCRIPTION/>
56
                <PARAMETERS/>
57
                <ARCS>
58
                    <ARC name="found" to="prepareIndexing"/>
59
                    <ARC name="notFound" to="createIndex"/>
60
                </ARCS>
61
            </NODE>
62
            <NODE name="createIndex" type="CreateIndex">
63
                <DESCRIPTION/>
64
                <PARAMETERS/>
65
                <ARCS>
66
                    <ARC to="prepareIndexing"/>
67
                </ARCS>
68
            </NODE>
69
            <NODE name="prepareIndexing" type="PrepareIndexJob">
70
                <DESCRIPTION>Prepare indexing</DESCRIPTION>
71
                <PARAMETERS>
72
                    <PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
73
                    <PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
74
                    <PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
75
                    <PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
76
                </PARAMETERS>
77
                <ARCS>
78
                    <ARC to="cleanupXml"/>
79
                    <ARC to="cleanupRotten"/>
80
                </ARCS>
81
            </NODE>
82
            <NODE name="cleanupXml" type="DeleteHdfsPathJob">
83
                <DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
84
                <PARAMETERS>
85
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
86
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
90 87
						{ 	
91
							'path' : 'outputRecordsPath'
88
							'path' : 'hdfsRecordsPath'
92 89
						}
93
					</PARAM>					
94
				</PARAMETERS>
95
				<ARCS>
96
					<ARC to="groupEntities" />
97
				</ARCS>
98
			</NODE>		
99
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
100
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
101
				<PARAMETERS>
102
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
103
					<PARAM required="true" type="string" name="envParams" managedBy="system">
90
					</PARAM>
91
                </PARAMETERS>
92
                <ARCS>
93
                    <ARC to="groupEntities"/>
94
                </ARCS>
95
            </NODE>
96
            <NODE name="cleanupRotten" type="DeleteHdfsPathJob">
97
                <DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
98
                <PARAMETERS>
99
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
100
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
104 101
						{ 	
105 102
							'path' : 'rottenRecordsPath'
106 103
						}
107
					</PARAM>					
108
				</PARAMETERS>
109
				<ARCS>
110
					<ARC to="groupEntities" />
111
				</ARCS>
112
			</NODE>
113
			<NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
114
				<DESCRIPTION>M/R group entities</DESCRIPTION>
115
				<PARAMETERS>
116
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
117
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>				
118
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
104
					</PARAM>
105
                </PARAMETERS>
106
                <ARCS>
107
                    <ARC to="groupEntities"/>
108
                </ARCS>
109
            </NODE>
110
            <NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
111
                <DESCRIPTION>M/R group entities</DESCRIPTION>
112
                <PARAMETERS>
113
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
114
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
115
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
119 116
						{ 	
120 117
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
121 118
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
122 119
						}
123 120
					</PARAM>
124
					<PARAM required="true" type="string" name="envParams" managedBy="system">
121
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
125 122
						{ 	
126 123
							'mapred.output.dir' : 'hdfsRecordsPath',
127 124
							'index.entity.links' : 'index.entity.links',
......
130 127
							'relClasses' : 'relClasses'
131 128
						}
132 129
					</PARAM>
133
				</PARAMETERS>
134
				<ARCS>
135
					<ARC to="updateIndex" />
136
					<ARC to="setOAIFormat" />
137
				</ARCS>
138
			</NODE>			
139
			<NODE name="updateIndex" type="SubmitHadoopJob">
140
				<DESCRIPTION>M/O index records</DESCRIPTION>
141
				<PARAMETERS>
142
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
143
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>					
144
					<PARAM required="true" type="string" name="envParams" managedBy="system">
130
                </PARAMETERS>
131
                <ARCS>
132
                    <ARC to="updateIndex"/>
133
                    <ARC to="setOAIFormat"/>
134
                </ARCS>
135
            </NODE>
136
            <NODE name="updateIndex" type="SubmitHadoopJob">
137
                <DESCRIPTION>M/O index records</DESCRIPTION>
138
                <PARAMETERS>
139
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
140
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
141
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
145 142
						{ 	
146 143
							'mapred.input.dir' : 'hdfsRecordsPath',
147 144
							'mapred.output.dir' : 'rottenRecordsPath',
......
157 154
							'index.feed.timestamp' : 'index.feed.timestamp'
158 155
						}
159 156
					</PARAM>
160
				</PARAMETERS>
161
				<ARCS>
162
					<ARC to="finalize" />
163
				</ARCS>
164
			</NODE>
165
			<NODE name="finalize" type="FinalizeIndexFeeding">
166
				<DESCRIPTION>commit changes</DESCRIPTION>
167
				<PARAMETERS />
168
				<ARCS>
169
					<ARC to="updateDs" />
170
				</ARCS>
171
			</NODE>
172
			<NODE name="updateDs" type="IndexDsUpdateJob">
173
				<DESCRIPTION>update DS</DESCRIPTION>
174
				<PARAMETERS />
175
				<ARCS>
176
					<ARC to="sync" />
177
				</ARCS>
178
			</NODE>				
157
                </PARAMETERS>
158
                <ARCS>
159
                    <ARC to="finalize"/>
160
                </ARCS>
161
            </NODE>
162
            <NODE name="finalize" type="FinalizeIndexFeeding">
163
                <DESCRIPTION>commit changes</DESCRIPTION>
164
                <PARAMETERS/>
165
                <ARCS>
166
                    <ARC to="updateDs"/>
167
                </ARCS>
168
            </NODE>
169
            <NODE name="updateDs" type="IndexDsUpdateJob">
170
                <DESCRIPTION>update DS</DESCRIPTION>
171
                <PARAMETERS/>
172
                <ARCS>
173
                    <ARC to="sync"/>
174
                </ARCS>
175
            </NODE>				
179 176

  
180 177
			<!--  UPDATE STATS -->
181
			<NODE name="prepareStats" type="PrepareStatsParams">
182
				<DESCRIPTION />
183
				<PARAMETERS>
184
					<PARAM required="true" type="string" name="paramsMapJson" managedBy="user">
178
            <NODE name="prepareStats" type="PrepareStatsParams">
179
                <DESCRIPTION/>
180
                <PARAMETERS>
181
                    <PARAM required="true" type="string" name="paramsMapJson" managedBy="user">
185 182
					{
186
						'oozieWfApplicationPath' : '/user/eri.katsari/stats/oozie_app',
187
						'statsDbUrl' : 'jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats',
183
						'oozieWfApplicationPath' : '/user/dnet/lib/stats/oozie_app',
184
					         'statsDbUrl' : ' jdbc:postgresql://stats.openaire.eu:5432/stats',
188 185
						'statsDbUser' : 'sqoop',
189 186
						'statsDbPass' : 'sqoop',
190 187
						'statsDbDriver' : 'org.postgresql.Driver',
191
						'statsDbTableMap' : 'datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultResult=result_results,resultTopic=result_topics,category=category,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources',
192
						'statsSqoopRecsPerStatement' : '1000',
193
						'statsSqoopStatementPerTrans' : '1000',
188
						'statsDbTableMap' : 'datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources',
189
						 'statsSqoopRecsPerStatement' : '10000',
190
						'statsSqoopStatementPerTrans' : '1000000',
194 191
						'statsSqoopReducersCount' : '4',
195 192

  
196
						'statsOutputPath' : '/tmp/test_stats/',
193
						'statsOutputPath' : '/tmp/stats/',
197 194
						'statsNullStringField' : 'NULL',
198 195
						'statsNullNumericField' : '-1',
199
						'statsEnclosingCharacter' : '*',
196
						'statsEnclosingCharacter' : '#',
200 197
						'statsDelimCharacter' : '!',
201
					
202
						'out1' : 'datasource',
198
					   	'out1' : 'datasource',
203 199
						'out2' : 'project',
204 200
						'out3' : 'organization',
205 201
						'out4' : 'datasourceOrganization',
......
220 216
						'out19' : 'claim',
221 217
						'out20' : 'concept'
222 218
					}
223
					</PARAM>				
224
				</PARAMETERS>
225
				<ARCS>
226
					<ARC to="updateStats" />
227
				</ARCS>			
228
			</NODE>
229
			<NODE name="updateStats" type="SubmitHadoopJob">
230
				<DESCRIPTION>Update stats DB</DESCRIPTION>
231
				<PARAMETERS>
232
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
233
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>				
234
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
219
					</PARAM>
220
                </PARAMETERS>
221
                <ARCS>
222
                    <ARC to="updateStats"/>
223
                </ARCS>
224
            </NODE>
225
            <NODE name="updateStats" type="SubmitHadoopJob">
226
                <DESCRIPTION>Update stats DB</DESCRIPTION>
227
                <PARAMETERS>
228
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
229
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>
230
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
235 231
						{ 	
236 232
							'Stats_Hbase_Source_Table' : 'hbase.mapred.datatable'
237 233
						}
238 234
					</PARAM>
239
					<PARAM required="true" type="string" name="envParams" managedBy="system">
235
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
240 236
						{ 	
241 237
							'nameNode' : 'nameNode',
242 238
							'jobTracker' : 'jobTracker',
......
253 249
							'Stats_null_String_Field' : 'statsNullStringField',
254 250
							'Stats_null_Numeric_Field' : 'statsNullNumericField',
255 251
							'Stats_enclosing_Character' : 'statsEnclosingCharacter',
256
							'Stats_delim_Character' : 'statsDelimCharacter',
257
							'out1' : 'out1',
252
				            		'Stats_delim_Character' : 'statsDelimCharacter',
253
                                                        'out1' : 'out1',
258 254
							'out2' : 'out2',
259 255
							'out3' : 'out3',
260 256
							'out4' : 'out4',
......
278 274
							'Stats_indexConf' : 'index.entity.links'
279 275
						}
280 276
					</PARAM>
281
				</PARAMETERS>
282
				<ARCS>
283
					<ARC to="sync" />
284
				</ARCS>
285
			</NODE>			
286

  
277
                </PARAMETERS>
278
                <ARCS>
279
                    <ARC to="sync"/>
280
                </ARCS>
281
            </NODE>
287 282
			<!-- WAIT FOR INDEX AND STATS TO COMPLETE -->
288
			<NODE name="sync" isJoin="true">
289
				<DESCRIPTION />
290
				<PARAMETERS />
291
				<ARCS>
292
					<ARC to="findSearchService" />
293
				</ARCS>
294
			</NODE>				
295

  
296
			<NODE name="findSearchService" type="FindSearchService">
297
				<DESCRIPTION>find search service</DESCRIPTION>
298
				<PARAMETERS>
299
					<PARAM name="xquery" type="string" managedBy="user" required="false">/RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='SearchServiceResourceType' and .//RESOURCE_KIND/@value='ServiceResources']/HEADER/RESOURCE_IDENTIFIER/@value/string()</PARAM>
300
					<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
301
				</PARAMETERS>
302
				<ARCS>
303
					<ARC name="found" to="switch" />
304
					<ARC name="notFound" to="success" />
305
				</ARCS>
306
			</NODE>			
307
			<NODE name="switch" type="SwitchIndex">
308
				<DESCRIPTION>switch index</DESCRIPTION>
309
				<PARAMETERS>
310
					<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
311
					<PARAM name="inputIndexIdParam" type="string" managedBy="system" required="true">index_id</PARAM>
312
					<PARAM name="outputIndexIdParam" type="string" managedBy="system" required="true">IndexId</PARAM>					
313
				</PARAMETERS>
314
				<ARCS>
315
					<ARC to="success" />
316
				</ARCS>
317
			</NODE>	
283
            <NODE name="sync" isJoin="true">
284
                <DESCRIPTION/>
285
                <PARAMETERS/>
286
                <ARCS>
287
                    <ARC to="findSearchService"/>
288
                </ARCS>
289
            </NODE>
290
            <NODE name="findSearchService" type="FindSearchService">
291
                <DESCRIPTION>find search service</DESCRIPTION>
292
                <PARAMETERS>
293
                    <PARAM name="xquery" type="string" managedBy="user" required="false">/RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='SearchServiceResourceType' and .//RESOURCE_KIND/@value='ServiceResources']/HEADER/RESOURCE_IDENTIFIER/@value/string()</PARAM>
294
                    <PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
295
                </PARAMETERS>
296
                <ARCS>
297
                    <ARC name="found" to="switch"/>
298
                    <ARC name="notFound" to="success"/>
299
                </ARCS>
300
            </NODE>
301
            <NODE name="switch" type="SwitchIndex">
302
                <DESCRIPTION>switch index</DESCRIPTION>
303
                <PARAMETERS>
304
                    <PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
305
                    <PARAM name="inputIndexIdParam" type="string" managedBy="system" required="true">index_id</PARAM>
306
                    <PARAM name="outputIndexIdParam" type="string" managedBy="system" required="true">IndexId</PARAM>
307
                </PARAMETERS>
308
                <ARCS>
309
                    <ARC to="success"/>
310
                </ARCS>
311
            </NODE>	
318 312
			
319 313
			<!-- OAI STORE UPDATE -->
320
			<NODE name="setOAIFormat" type="SetFormatInfo">
321
				<DESCRIPTION />
322
				<PARAMETERS>
323
					<PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM>
324
					<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
325
					<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
326
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
327
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
328
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>														
329
				</PARAMETERS>
330
				<ARCS>
331
					<ARC to="prepareOAI" />
332
				</ARCS>
333
			</NODE>	
334
			<NODE name="prepareOAI" type="PrepareOaiJob">
335
				<DESCRIPTION>Prepare oai feeding</DESCRIPTION>
336
				<PARAMETERS>
337
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
338
					<PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM>
339
					<PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM>
340
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
341
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
342
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>					
343
				</PARAMETERS>
344
				<ARCS>
345
					<ARC to="OAIDropStore" />
346
				</ARCS>
347
			</NODE>
348
			<NODE name="OAIDropStore" type="OAIDropStore" >
349
				<DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION>
350
				<PARAMETERS>
351
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
352
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
353
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
354
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>													
355
				</PARAMETERS>
356
				<ARCS>
357
					<ARC to="OAICreateStore" />
358
				</ARCS>
359
			</NODE>			
360
			<NODE name="OAICreateStore" type="OAICreateStore" >
361
				<DESCRIPTION />
362
				<PARAMETERS>
363
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
364
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
365
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
366
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>												
367
				</PARAMETERS>
368
				<ARCS>
369
					<ARC to="feedOAI" />
370
				</ARCS>
371
			</NODE>			
372
			
373
			<NODE name="feedOAI" type="SubmitHadoopJob">
374
				<DESCRIPTION>M/O oai feeding</DESCRIPTION>
375
				<PARAMETERS>
376
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
377
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM>				
378
					<PARAM required="true" type="string" name="envParams" managedBy="system">
314
            <NODE name="setOAIFormat" type="SetFormatInfo">
315
                <DESCRIPTION/>
316
                <PARAMETERS>
317
                    <PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM>
318
                    <PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
319
                    <PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
320
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
321
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
322
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
323
                </PARAMETERS>
324
                <ARCS>
325
                    <ARC to="prepareOAI"/>
326
                </ARCS>
327
            </NODE>
328
            <NODE name="prepareOAI" type="PrepareOaiJob">
329
                <DESCRIPTION>Prepare oai feeding</DESCRIPTION>
330
                <PARAMETERS>
331
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
332
                    <PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM>
333
                    <PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM>
334
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
335
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
336
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
337
                </PARAMETERS>
338
                <ARCS>
339
                    <ARC to="OAIDropStore"/>
340
                </ARCS>
341
            </NODE>
342
            <NODE name="OAIDropStore" type="OAIDropStore">
343
                <DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION>
344
                <PARAMETERS>
345
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
346
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
347
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
348
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
349
                </PARAMETERS>
350
                <ARCS>
351
                    <ARC to="OAICreateStore"/>
352
                </ARCS>
353
            </NODE>
354
            <NODE name="OAICreateStore" type="OAICreateStore">
355
                <DESCRIPTION/>
356
                <PARAMETERS>
357
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
358
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
359
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
360
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
361
                </PARAMETERS>
362
                <ARCS>
363
                    <ARC to="feedOAI"/>
364
                </ARCS>
365
            </NODE>
366
            <NODE name="feedOAI" type="SubmitHadoopJob">
367
                <DESCRIPTION>M/O oai feeding</DESCRIPTION>
368
                <PARAMETERS>
369
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
370
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM>
371
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
379 372
						{ 	
380 373
							'mapred.input.dir' : 'hdfsRecordsPath',
381 374
							'services.publisher.oai.collection' : 'oaiStoreCollection',
......
384 377
							'services.publisher.oai.db' : 'oai_dbName'
385 378
						}
386 379
					</PARAM>
387
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
380
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
388 381
						{ 	
389 382
							'services.publisher.oai.host' : 'services.publisher.oai.host',
390 383
							'services.publisher.oai.port' : 'services.publisher.oai.port'			
391 384
						}
392
					</PARAM>					
393
				</PARAMETERS>
394
				<ARCS>
395
					<ARC to="CompoundIndexes" />
396
				</ARCS>
397
			</NODE>
398
			<NODE name="CompoundIndexes" type="OAICreateIndex">
399
				<DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
400
				<PARAMETERS>
401
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
402
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
403
					<PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
404
				</PARAMETERS>
405
				<ARCS>
406
					<ARC to="ConfigIndexes" />
407
				</ARCS>
408
			</NODE>			
409
			<NODE name="ConfigIndexes" type="OAIEnsureIndexes" >
410
				<DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
411
				<PARAMETERS>
412
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
413
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
414
				</PARAMETERS>
415
				<ARCS>
416
					<ARC to="SetsCount" />
417
					<ARC to="ConfigSets" />
418
				</ARCS>
419
			</NODE>
420
			<NODE name="SetsCount" type="OAISetsCountUpdate">
421
				<DESCRIPTION>Count records in each OAI set, for each exported metadata format linked to the given oai collection</DESCRIPTION>
422
				<PARAMETERS>
423
				<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
424
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
425
					<PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>	
426
				</PARAMETERS>
427
				<ARCS>
428
					<ARC to="SetOAIDB" />
429
				</ARCS>
430
			</NODE>			
431
			<NODE name="ConfigSets" type="RefreshSetsFromConfig">
432
				<DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
433
				<PARAMETERS>
434
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
435
				</PARAMETERS>
436
				<ARCS>
437
					<ARC to="SetOAIDB" />
438
				</ARCS>
439
			</NODE>
440
			<NODE name="SetOAIDB" type="SetCurrentOAIDB" isJoin="true">
441
				<DESCRIPTION>Update the currentdb in the OAI configuration profile to the value in oai_dbName</DESCRIPTION>
442
				<PARAMETERS>
443
					<PARAM name="oaiDBNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
444
				</PARAMETERS>
445
				<ARCS>
446
					<ARC to="success" />
447
				</ARCS>
448
			</NODE>
385
					</PARAM>
386
                </PARAMETERS>
387
                <ARCS>
388
                    <ARC to="CompoundIndexes"/>
389
                </ARCS>
390
            </NODE>
391
            <NODE name="CompoundIndexes" type="OAICreateIndex">
392
                <DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
393
                <PARAMETERS>
394
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
395
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
396
                    <PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
397
                </PARAMETERS>
398
                <ARCS>
399
                    <ARC to="ConfigIndexes"/>
400
                </ARCS>
401
            </NODE>
402
            <NODE name="ConfigIndexes" type="OAIEnsureIndexes">
403
                <DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
404
                <PARAMETERS>
405
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
406
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
407
                </PARAMETERS>
408
                <ARCS>
409
                    <ARC to="SetsCount"/>
410
                    <ARC to="ConfigSets"/>
411
                </ARCS>
412
            </NODE>
413
            <NODE name="SetsCount" type="OAISetsCountUpdate">
414
                <DESCRIPTION>Count records in each OAI set, for each exported metadata format linked to the given oai collection</DESCRIPTION>
415
                <PARAMETERS>
416
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
417
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
418
                    <PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>
419
                </PARAMETERS>
420
                <ARCS>
421
                    <ARC to="SetOAIDB"/>
422
                </ARCS>
423
            </NODE>
424
            <NODE name="ConfigSets" type="RefreshSetsFromConfig">
425
                <DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
426
                <PARAMETERS>
427
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
428
                </PARAMETERS>
429
                <ARCS>
430
                    <ARC to="SetOAIDB"/>
431
                </ARCS>
432
            </NODE>
433
            <NODE name="SetOAIDB" type="SetCurrentOAIDB" isJoin="true">
434
                <DESCRIPTION>Update the currentdb in the OAI configuration profile to the value in oai_dbName</DESCRIPTION>
435
                <PARAMETERS>
436
                    <PARAM name="oaiDBNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
437
                </PARAMETERS>
438
                <ARCS>
439
                    <ARC to="success"/>
440
                </ARCS>
441
            </NODE>
449 442
        </CONFIGURATION>
450
        <STATUS />
443
        <STATUS>
444
            <LAST_EXECUTION_ID></LAST_EXECUTION_ID>
445
            <LAST_EXECUTION_DATE></LAST_EXECUTION_DATE>
446
            <LAST_EXECUTION_STATUS></LAST_EXECUTION_STATUS>
447
            <LAST_EXECUTION_ERROR></LAST_EXECUTION_ERROR>
448
        </STATUS>
451 449
    </BODY>
452
</RESOURCE_PROFILE>
450
</RESOURCE_PROFILE>

Also available in: Unified diff