Project

General

Profile

1 29365 eri.katsar
<workflow-app name="test-core_examples_javamapreduce_stats" xmlns="uri:oozie:workflow:0.4">
2
	<!-- map reduce job that exports hbase data and prepares them for import to the relation database used for statistics generation -->
3 29327 eri.katsar
4 28535 eri.katsar
	<global>
5
		<job-tracker>${jobTracker}</job-tracker>
6
		<name-node>${nameNode}</name-node>
7
		<configuration>
8
			<property>
9
				<name>mapred.job.queue.name</name>
10
				<value>${queueName}</value>
11
			</property>
12 29238 eri.katsar
			<property>
13
				<name>oozie.sqoop.log.level</name>
14
				<value>DEBUG</value>
15
			</property>
16 28523 eri.katsar
		</configuration>
17 29238 eri.katsar
	</global>
18 30044 eri.katsar
	<start to='get-scanner' />
19 28512 eri.katsar
	<action name='get-scanner'>
20
		<java>
21 29327 eri.katsar
			<main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator
22
			</main-class>
23 29238 eri.katsar
			<!-- column families: -->
24 31069 eri.katsar
			<arg>
25
				-f
26
				datasource ,
27
				datasourceOrganization_provision_provides,
28
  				organization,
29
				project,
30
				projectOrganization_participation_hasParticipant,
31
				result
32
				, resultProject_outcome_produces,
33 31099 eri.katsar
<!-- 		    personResult_authorship_hasAuthor, -->
34 31069 eri.katsar
				  resultResult_publicationDataset_isRelatedTo
35 29922 eri.katsar
			</arg>
36 29327 eri.katsar
37
			<capture-output />
38 31069 eri.katsar
		</java>
39 28512 eri.katsar
		<ok to="mr_export" />
40
		<error to="fail" />
41
	</action>
42
	<action name="mr_export">
43
		<map-reduce>
44 29327 eri.katsar
45 28514 eri.katsar
			<prepare>
46 28565 eri.katsar
				<delete path="${nameNode}${Stats_output_Path}" />
47 29327 eri.katsar
48 28514 eri.katsar
			</prepare>
49
			<configuration>
50 28512 eri.katsar
				<property>
51
					<name>hbase.mapreduce.scan</name>
52
					<value>${wf:actionData('get-scanner')['scan']}</value>
53
				</property>
54 28514 eri.katsar
				<property>
55
					<name>hbase.rootdir</name>
56 31069 eri.katsar
					<!--<value>hdfs://nmis-hadoop-cluster/hbase</value> -->
57
					<!-- <value>hdfs://dm-cluster-nn</value> -->
58
					<value>$nameNode/hbase</value>
59
60 28514 eri.katsar
				</property>
61 29327 eri.katsar
62 28514 eri.katsar
				<property>
63
					<name>hbase.security.authentication</name>
64 29238 eri.katsar
					<value>simple</value>
65 28514 eri.katsar
				</property>
66 28523 eri.katsar
				<!-- ZOOKEEPER -->
67 29327 eri.katsar
68 28514 eri.katsar
				<property>
69
					<name>hbase.zookeeper.quorum</name>
70 31069 eri.katsar
					<value>namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu </value>
71
					<!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu </value> -->
72 28514 eri.katsar
				</property>
73
				<property>
74
					<name>zookeeper.znode.rootserver</name>
75 31069 eri.katsar
					<value>root-region-server</value>
76
77 28514 eri.katsar
				</property>
78 29327 eri.katsar
79 28514 eri.katsar
				<property>
80
					<name>hbase.zookeeper.property.clientPort</name>
81 31069 eri.katsar
					<value>2181</value>
82
					<!--<value>2182</value> -->
83 28514 eri.katsar
				</property>
84 29327 eri.katsar
85
86 28519 eri.katsar
				<!-- MR IO -->
87 29327 eri.katsar
88
89 28514 eri.katsar
				<property>
90
					<name>mapreduce.inputformat.class</name>
91
					<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
92
				</property>
93 29327 eri.katsar
94 28514 eri.katsar
				<property>
95
					<name>mapred.mapoutput.key.class</name>
96
					<value>org.apache.hadoop.io.Text</value>
97
				</property>
98
				<property>
99
					<name>mapred.mapoutput.value.class</name>
100
					<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
101
				</property>
102
				<property>
103
					<name>mapred.output.key.class</name>
104
					<value>org.apache.hadoop.io.Text</value>
105
				</property>
106
				<property>
107
					<name>mapred.output.value.class</name>
108 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
109
					</value>
110 28514 eri.katsar
				</property>
111 29327 eri.katsar
112 28514 eri.katsar
				<!-- ## This is required for new MapReduce API usage -->
113
				<property>
114
					<name>mapred.mapper.new-api</name>
115
					<value>true</value>
116
				</property>
117
				<property>
118
					<name>mapred.reducer.new-api</name>
119
					<value>true</value>
120
				</property>
121 29327 eri.katsar
122 28514 eri.katsar
				<!-- # Job-specific options -->
123
				<property>
124
					<name>dfs.blocksize</name>
125
					<value>32M</value>
126
				</property>
127
				<property>
128
					<name>mapred.output.compress</name>
129
					<value>false</value>
130
				</property>
131
				<property>
132
					<name>mapred.reduce.tasks.speculative.execution</name>
133
					<value>false</value>
134
				</property>
135
				<property>
136
					<name>mapred.reduce.tasks.speculative.execution</name>
137
					<value>false</value>
138
				</property>
139
				<property>
140
					<name>mapreduce.map.speculative</name>
141
					<value>false</value>
142
				</property>
143 29327 eri.katsar
144 28514 eri.katsar
				<!-- I/O FORMAT -->
145 29365 eri.katsar
				<!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix issue with traling tab added between id and value in multiple outputs -->
146 29327 eri.katsar
				<property>
147 29238 eri.katsar
					<name>mapred.textoutputformat.separator</name>
148
					<value>${Stats_delim_Character}</value>
149
				</property>
150 28514 eri.katsar
				<!-- ## Names of all output ports -->
151 29327 eri.katsar
152 28514 eri.katsar
				<property>
153 28543 eri.katsar
					<name>mapreduce.multipleoutputs</name>
154 29327 eri.katsar
155 29442 eri.katsar
					<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20}
156 29327 eri.katsar
					</value>
157
158 28539 eri.katsar
				</property>
159 29238 eri.katsar
				<!-- datasource -->
160 28539 eri.katsar
				<property>
161 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
162 28550 eri.katsar
					<value>org.apache.hadoop.io.Text</value>
163 28539 eri.katsar
				</property>
164
				<property>
165 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
166 28539 eri.katsar
					<value>org.apache.hadoop.io.Text</value>
167
				</property>
168
				<property>
169 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
170 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
171
					</value>
172 28514 eri.katsar
				</property>
173 29238 eri.katsar
				<!-- datasourceLanguage -->
174 28514 eri.katsar
				<property>
175 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
176 28550 eri.katsar
					<value>org.apache.hadoop.io.Text</value>
177 28514 eri.katsar
				</property>
178
				<property>
179 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
180 28539 eri.katsar
					<value>org.apache.hadoop.io.Text</value>
181 28514 eri.katsar
				</property>
182 28539 eri.katsar
				<property>
183 29238 eri.katsar
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
184 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
185
					</value>
186 28539 eri.katsar
				</property>
187 29327 eri.katsar
188
189
190 29238 eri.katsar
				<!-- datasourceOrganization -->
191
				<property>
192
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
193
					<value>org.apache.hadoop.io.Text</value>
194
				</property>
195
				<property>
196
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
197
					<value>org.apache.hadoop.io.Text</value>
198
				</property>
199
				<property>
200
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
201 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
202
					</value>
203 29238 eri.katsar
				</property>
204 29327 eri.katsar
205
				<!-- datasourceTopic -->
206 29238 eri.katsar
				<property>
207
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
208
					<value>org.apache.hadoop.io.Text</value>
209
				</property>
210
				<property>
211
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
212
					<value>org.apache.hadoop.io.Text</value>
213
				</property>
214
				<property>
215
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
216 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
217
					</value>
218 29238 eri.katsar
				</property>
219 29327 eri.katsar
220
				<!-- resultDatasource -->
221 29238 eri.katsar
				<property>
222
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
223
					<value>org.apache.hadoop.io.Text</value>
224
				</property>
225
				<property>
226
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
227
					<value>org.apache.hadoop.io.Text</value>
228
				</property>
229
				<property>
230
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
231 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
232
					</value>
233 29238 eri.katsar
				</property>
234 29327 eri.katsar
				<!-- organization -->
235 29238 eri.katsar
				<property>
236
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
237
					<value>org.apache.hadoop.io.Text</value>
238
				</property>
239
				<property>
240
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
241
					<value>org.apache.hadoop.io.Text</value>
242
				</property>
243
				<property>
244
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
245 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
246
					</value>
247 29238 eri.katsar
				</property>
248 29327 eri.katsar
249
				<!-- projectOrganization -->
250 29238 eri.katsar
				<property>
251
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
252
					<value>org.apache.hadoop.io.Text</value>
253
				</property>
254
				<property>
255
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
256
					<value>org.apache.hadoop.io.Text</value>
257
				</property>
258
				<property>
259
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
260 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
261
					</value>
262 29238 eri.katsar
				</property>
263 29327 eri.katsar
				<!-- resultProject -->
264 29238 eri.katsar
				<property>
265
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
266
					<value>org.apache.hadoop.io.Text</value>
267
				</property>
268
				<property>
269
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
270
					<value>org.apache.hadoop.io.Text</value>
271
				</property>
272
				<property>
273
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
274 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
275
					</value>
276 29238 eri.katsar
				</property>
277 29327 eri.katsar
278
				<!-- project -->
279 29238 eri.katsar
				<property>
280
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
281
					<value>org.apache.hadoop.io.Text</value>
282
				</property>
283
				<property>
284
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
285
					<value>org.apache.hadoop.io.Text</value>
286
				</property>
287
				<property>
288
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
289 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
290
					</value>
291 29238 eri.katsar
				</property>
292 29327 eri.katsar
293
				<!-- resultConcept -->
294 29238 eri.katsar
				<property>
295
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
296
					<value>org.apache.hadoop.io.Text</value>
297
				</property>
298
				<property>
299
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
300
					<value>org.apache.hadoop.io.Text</value>
301
				</property>
302
				<property>
303
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
304 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
305
					</value>
306 29238 eri.katsar
				</property>
307 29327 eri.katsar
308 29238 eri.katsar
				<!-- resultClaim -->
309
				<property>
310
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
311
					<value>org.apache.hadoop.io.Text</value>
312
				</property>
313
				<property>
314
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
315
					<value>org.apache.hadoop.io.Text</value>
316
				</property>
317
				<property>
318
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
319 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
320
					</value>
321 29238 eri.katsar
				</property>
322 29327 eri.katsar
323 29238 eri.katsar
				<!-- resultClassification -->
324
				<property>
325
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
326
					<value>org.apache.hadoop.io.Text</value>
327
				</property>
328
				<property>
329
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
330
					<value>org.apache.hadoop.io.Text</value>
331
				</property>
332
				<property>
333
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
334 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
335
					</value>
336 29238 eri.katsar
				</property>
337 29327 eri.katsar
338 29238 eri.katsar
				<!-- resultLanguage -->
339
				<property>
340
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
341
					<value>org.apache.hadoop.io.Text</value>
342
				</property>
343
				<property>
344
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
345
					<value>org.apache.hadoop.io.Text</value>
346
				</property>
347
				<property>
348
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
349 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
350
					</value>
351 29238 eri.katsar
				</property>
352 29327 eri.katsar
353
				<!-- resultProject -->
354 29238 eri.katsar
				<property>
355
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
356
					<value>org.apache.hadoop.io.Text</value>
357
				</property>
358
				<property>
359
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
360
					<value>org.apache.hadoop.io.Text</value>
361
				</property>
362
				<property>
363
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
364 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
365
					</value>
366 29238 eri.katsar
				</property>
367 29327 eri.katsar
				<!-- resultResult -->
368 29238 eri.katsar
				<property>
369
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
370
					<value>org.apache.hadoop.io.Text</value>
371
				</property>
372
				<property>
373
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
374
					<value>org.apache.hadoop.io.Text</value>
375
				</property>
376
				<property>
377
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
378 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
379
					</value>
380 29238 eri.katsar
				</property>
381 29327 eri.katsar
				<!-- resultTopic -->
382 29238 eri.katsar
				<property>
383
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
384
					<value>org.apache.hadoop.io.Text</value>
385
				</property>
386
				<property>
387
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
388
					<value>org.apache.hadoop.io.Text</value>
389
				</property>
390
				<property>
391
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
392 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
393
					</value>
394 29238 eri.katsar
				</property>
395 29327 eri.katsar
396
				<!-- resultDatasource -->
397 29238 eri.katsar
				<property>
398
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
399
					<value>org.apache.hadoop.io.Text</value>
400
				</property>
401
				<property>
402
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
403
					<value>org.apache.hadoop.io.Text</value>
404
				</property>
405
				<property>
406
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
407 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
408
					</value>
409 29238 eri.katsar
				</property>
410 29327 eri.katsar
411
412
413
				<!-- result -->
414 29238 eri.katsar
				<property>
415
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
416
					<value>org.apache.hadoop.io.Text</value>
417
				</property>
418
				<property>
419
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
420
					<value>org.apache.hadoop.io.Text</value>
421
				</property>
422
				<property>
423
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
424 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
425
					</value>
426 29238 eri.katsar
				</property>
427 29327 eri.katsar
428
				<!-- claim -->
429 29238 eri.katsar
				<property>
430
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
431
					<value>org.apache.hadoop.io.Text</value>
432
				</property>
433
				<property>
434
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
435
					<value>org.apache.hadoop.io.Text</value>
436
				</property>
437
				<property>
438
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
439 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
440
					</value>
441 29238 eri.katsar
				</property>
442 29327 eri.katsar
				<!-- cncept -->
443 29238 eri.katsar
				<property>
444
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
445
					<value>org.apache.hadoop.io.Text</value>
446
				</property>
447
				<property>
448
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
449
					<value>org.apache.hadoop.io.Text</value>
450
				</property>
451
				<property>
452
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
453 29327 eri.katsar
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
454
					</value>
455 29238 eri.katsar
				</property>
456 29327 eri.katsar
457 29442 eri.katsar
458 28514 eri.katsar
				<!-- ## Classes of mapper and reducer -->
459 29327 eri.katsar
460 28514 eri.katsar
				<property>
461
					<name>mapreduce.map.class</name>
462 29327 eri.katsar
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper
463
					</value>
464 28514 eri.katsar
				</property>
465
				<property>
466
					<name>mapreduce.reduce.class</name>
467 29327 eri.katsar
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer
468
					</value>
469 28514 eri.katsar
				</property>
470 28535 eri.katsar
				<property>
471
					<name>io.serializations</name>
472 29327 eri.katsar
					<value>org.apache.hadoop.io.serializer.WritableSerialization
473
					</value>
474 28535 eri.katsar
				</property>
475 28519 eri.katsar
				<!-- ## Custom config -->
476 29327 eri.katsar
477 28514 eri.katsar
				<!--delim character used to seperate fields in hdfs dump files <property> -->
478 28519 eri.katsar
				<property>
479 29238 eri.katsar
					<name>stats.delim</name>
480 28565 eri.katsar
					<value>${Stats_delim_Character}</value>
481 28519 eri.katsar
				</property>
482 28514 eri.katsar
				<!--default string for Null String Values -->
483
				<property>
484
					<name>stats.nullString</name>
485 28565 eri.katsar
					<value>${Stats_null_String_Field}</value>
486 28514 eri.katsar
				</property>
487
				<!--default string for Null Numeric Values -->
488
				<property>
489
					<name>stats.nullNum</name>
490 28565 eri.katsar
					<value>${Stats_null_Numeric_Field}</value>
491 28514 eri.katsar
				</property>
492 29365 eri.katsar
				<property>
493
					<name>stats.enclChar</name>
494
					<value>${Stats_enclosing_Character}</value>
495
				</property>
496
497
498 28514 eri.katsar
				<!--source hbase table -->
499 29538 eri.katsar
				<property>
500 28514 eri.katsar
					<name>hbase.mapreduce.inputtable</name>
501 28565 eri.katsar
					<value>${Stats_Hbase_Source_Table}</value>
502 28514 eri.katsar
				</property>
503 29538 eri.katsar
				<property>
504 29497 eri.katsar
					<name>hbase.mapred.inputtable</name>
505
					<value>${Stats_Hbase_Source_Table}</value>
506 29538 eri.katsar
				</property>
507
508 29327 eri.katsar
				<property>
509
					<!-- mapping of protos entities to tables in the relDB -->
510
					<name>stats.dbTablesMap</name>
511
					<value>${Stats_db_table_map}</value>
512 29249 eri.katsar
				</property>
513 29365 eri.katsar
514
				<!-- This directory does not correspond to a data store. In fact, this directory only contains multiple data stores. It has to be set to the name of the workflow node. -->
515 28514 eri.katsar
				<property>
516
					<name>mapred.output.dir</name>
517 28565 eri.katsar
					<value>${Stats_output_Path}</value>
518 28514 eri.katsar
				</property>
519 29327 eri.katsar
				<property>
520
					<name>stats.indexConf</name>
521
					<value>${Stats_indexConf}</value>
522
				</property>
523 28514 eri.katsar
				<!-- ## Workflow node parameters -->
524
				<property>
525
					<name>mapred.reduce.tasks</name>
526 29327 eri.katsar
					<value>${numReducers}</value>
527 28514 eri.katsar
				</property>
528 29538 eri.katsar
529 28514 eri.katsar
			</configuration>
530 28512 eri.katsar
		</map-reduce>
531 31099 eri.katsar
		<ok to="exportContext" />
532 29442 eri.katsar
		<error to="fail" />
533
	</action>
534
535 29556 eri.katsar
	<action name='exportContext'>
536
		<java>
537
			<prepare>
538
			</prepare>
539 29442 eri.katsar
			<configuration>
540
				<property>
541
					<name>mapred.job.queue.name</name>
542
					<value>${queueName}</value>
543
				</property>
544
			</configuration>
545 29581 eri.katsar
			<main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
546
			<arg>${Stats_output_Path}</arg>
547
			<arg>${isLookupEndpoint}</arg>
548 29556 eri.katsar
		</java>
549 31099 eri.katsar
		<ok to="prepareDatabase" />
550 28512 eri.katsar
		<error to="fail" />
551
	</action>
552 29327 eri.katsar
553 29581 eri.katsar
554 28543 eri.katsar
	<action name="prepareDatabase">
555 28499 eri.katsar
		<java>
556 29327 eri.katsar
557 28499 eri.katsar
			<prepare>
558
			</prepare>
559
			<configuration>
560
				<property>
561
					<name>mapred.job.queue.name</name>
562
					<value>${queueName}</value>
563
				</property>
564
			</configuration>
565 29327 eri.katsar
566 29238 eri.katsar
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
567 29538 eri.katsar
			<arg>-SworkingDir=${workingDir}</arg>
568 28499 eri.katsar
			<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
569 31069 eri.katsar
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
570 28565 eri.katsar
			<arg>-PStats_db_User=${Stats_db_User}</arg>
571
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
572
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
573 29327 eri.katsar
574 28499 eri.katsar
		</java>
575 28611 eri.katsar
		<ok to="sqoopImport" />
576 28499 eri.katsar
		<error to="fail" />
577
	</action>
578 29327 eri.katsar
579 28499 eri.katsar
	<action name="sqoopImport">
580
		<java>
581 28571 eri.katsar
			<prepare>
582
			</prepare>
583 28499 eri.katsar
			<configuration>
584
				<property>
585
					<name>mapred.job.queue.name</name>
586
					<value>${queueName}</value>
587
				</property>
588 29327 eri.katsar
589 29238 eri.katsar
				<property>
590
					<name>oozie.sqoop.log.level</name>
591
					<value>DEBUG</value>
592
				</property>
593 29327 eri.katsar
594 28499 eri.katsar
			</configuration>
595 29327 eri.katsar
596 29238 eri.katsar
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
597 29538 eri.katsar
			<arg>-SworkingDir=${workingDir}</arg>
598
			<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
599
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
600 28565 eri.katsar
			<arg>-PStats_db_User=${Stats_db_User}</arg>
601
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
602 29327 eri.katsar
603 28605 eri.katsar
			<arg>-PStats_output_Path=${Stats_output_Path}</arg>
604 29327 eri.katsar
			<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}
605
			</arg>
606 28605 eri.katsar
			<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
607 29327 eri.katsar
			<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}
608
			</arg>
609 29238 eri.katsar
			<arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
610 29327 eri.katsar
			<arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
611 29365 eri.katsar
			<arg>-PStats_enclosing_Character=${Stats_enclosing_Character} </arg>
612
613
		</java>
614 31069 eri.katsar
		<ok to="finalizeDatabase" />
615 28499 eri.katsar
		<error to="fail" />
616
	</action>
617 29327 eri.katsar
618 28543 eri.katsar
	<action name="finalizeDatabase">
619 28499 eri.katsar
		<java>
620
			<prepare>
621
			</prepare>
622
			<configuration>
623
				<property>
624
					<name>mapred.job.queue.name</name>
625
					<value>${queueName}</value>
626
				</property>
627
			</configuration>
628 29327 eri.katsar
629 29238 eri.katsar
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
630 29538 eri.katsar
			<arg>-SworkingDir=${workingDir}</arg>
631 29238 eri.katsar
			<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
632 28565 eri.katsar
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
633
			<arg>-PStats_db_User=${Stats_db_User}</arg>
634
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
635 29327 eri.katsar
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
636 28499 eri.katsar
		</java>
637 31099 eri.katsar
		<ok to="cleanUpHDFS" />
638 28499 eri.katsar
		<error to="fail" />
639
	</action>
640 31069 eri.katsar
641
642 29999 eri.katsar
	<action name="cleanUpHDFS">
643 31069 eri.katsar
		<fs>
644
645
			<delete path="${nameNode}${Stats_output_Path}" />
646
647 29998 eri.katsar
		</fs>
648
		<ok to="end" />
649
		<error to="fail" />
650
	</action>
651 29442 eri.katsar
	<kill name="fail">
652 28499 eri.katsar
		<message>
653 29327 eri.katsar
			Unfortunately, the process failed -- error message:
654
			[${wf:errorMessage(wf:lastErrorNode())}]
655 28499 eri.katsar
		</message>
656
	</kill>
657
	<end name="end" />
658 29412 eri.katsar
</workflow-app>