Project

General

Profile

1
<workflow-app name="stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5

    
6
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>NORMAL</value>
21
            </property>
22
            <property>
23
                <name>mapred.job.priority</name>
24
                <value>NORMAL</value>
25
            </property>
26
        </configuration>
27
    </global>
28
    <start to="exportUsageStats"/>
29
    <action name="mr_export">
30
        <map-reduce>
31

    
32
            <prepare>
33
                <delete path="${nameNode}${Stats_output_Path}"/>
34

    
35
            </prepare>
36
            <configuration>
37
                <property>
38
                    <name>hbase.mapreduce.scan</name>
39
                    <value>${wf:actionData('get-scanner')['scan']}</value>
40
                </property>
41
                <property>
42
                    <name>hbase.rootdir</name>
43
                    <value>$nameNode/hbase</value>
44

    
45
                </property>
46

    
47
                <property>
48
                    <name>hbase.security.authentication</name>
49
                    <value>simple</value>
50
                </property>
51
                <!-- ZOOKEEPER -->
52

    
53
                <property>
54
                    <name>hbase.zookeeper.quorum</name>
55
                    <value>
56
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
57
                    </value>
58
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
59
                        </value> -->
60
                </property>
61
                <property>
62
                    <name>zookeeper.znode.rootserver</name>
63
                    <value>root-region-server</value>
64

    
65
                </property>
66

    
67
                <property>
68
                    <name>hbase.zookeeper.property.clientPort</name>
69
                    <value>2181</value>
70
                </property>
71

    
72

    
73
                <!-- MR IO -->
74

    
75

    
76
                <property>
77
                    <name>mapreduce.inputformat.class</name>
78
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
79
                </property>
80

    
81
                <property>
82
                    <name>mapred.mapoutput.key.class</name>
83
                    <value>org.apache.hadoop.io.Text</value>
84
                </property>
85
                <property>
86
                    <name>mapred.mapoutput.value.class</name>
87
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
88
                </property>
89
                <property>
90
                    <name>mapred.output.key.class</name>
91
                    <value>org.apache.hadoop.io.Text</value>
92
                </property>
93
                <property>
94
                    <name>mapred.output.value.class</name>
95
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
96
                </property>
97

    
98
                <!-- ## This is required for new MapReduce API usage -->
99
                <property>
100
                    <name>mapred.mapper.new-api</name>
101
                    <value>true</value>
102
                </property>
103
                <property>
104
                    <name>mapred.reducer.new-api</name>
105
                    <value>true</value>
106
                </property>
107

    
108
                <!-- # Job-specific options -->
109
                <property>
110
                    <name>dfs.blocksize</name>
111
                    <value>32M</value>
112
                </property>
113
                <property>
114
                    <name>mapred.output.compress</name>
115
                    <value>false</value>
116
                </property>
117
                <property>
118
                    <name>mapred.reduce.tasks.speculative.execution</name>
119
                    <value>false</value>
120
                </property>
121
                <property>
122
                    <name>mapred.reduce.tasks.speculative.execution</name>
123
                    <value>false</value>
124
                </property>
125
                <property>
126
                    <name>mapreduce.map.speculative</name>
127
                    <value>false</value>
128
                </property>
129

    
130
                <!-- I/O FORMAT -->
131
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
132
                    issue with traling tab added between id and value in multiple outputs -->
133
                <property>
134
                    <name>mapred.textoutputformat.separator</name>
135
                    <value>${Stats_delim_Character}</value>
136
                </property>
137
                <!-- ## Names of all output ports -->
138

    
139
                <property>
140
                    <name>mapreduce.multipleoutputs</name>
141

    
142
                    <value>
143
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11}
144
                        ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}
145
                        ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31}
146
                        ${out32} ${out33}
147
                    </value>
148

    
149
                </property>
150
                <!-- datasource -->
151
                <property>
152
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
153
                    <value>org.apache.hadoop.io.Text</value>
154
                </property>
155
                <property>
156
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
157
                    <value>org.apache.hadoop.io.Text</value>
158
                </property>
159
                <property>
160
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
161
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
162
                </property>
163
                <!-- datasourceLanguage -->
164
                <property>
165
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
166
                    <value>org.apache.hadoop.io.Text</value>
167
                </property>
168
                <property>
169
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
170
                    <value>org.apache.hadoop.io.Text</value>
171
                </property>
172
                <property>
173
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
174
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
175
                </property>
176

    
177
                <!-- datasourceOrganization -->
178
                <property>
179
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
180
                    <value>org.apache.hadoop.io.Text</value>
181
                </property>
182
                <property>
183
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
184
                    <value>org.apache.hadoop.io.Text</value>
185
                </property>
186
                <property>
187
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
188
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
189
                </property>
190

    
191
                <!-- datasourceTopic -->
192
                <property>
193
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
194
                    <value>org.apache.hadoop.io.Text</value>
195
                </property>
196
                <property>
197
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
198
                    <value>org.apache.hadoop.io.Text</value>
199
                </property>
200
                <property>
201
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
202
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
203
                </property>
204

    
205
                <!-- resultDatasource -->
206
                <property>
207
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
208
                    <value>org.apache.hadoop.io.Text</value>
209
                </property>
210
                <property>
211
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
212
                    <value>org.apache.hadoop.io.Text</value>
213
                </property>
214
                <property>
215
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
216
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
217
                </property>
218
                <!-- organization -->
219
                <property>
220
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
221
                    <value>org.apache.hadoop.io.Text</value>
222
                </property>
223
                <property>
224
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
225
                    <value>org.apache.hadoop.io.Text</value>
226
                </property>
227
                <property>
228
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
229
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
230
                </property>
231

    
232
                <!-- projectOrganization -->
233
                <property>
234
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
235
                    <value>org.apache.hadoop.io.Text</value>
236
                </property>
237
                <property>
238
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
239
                    <value>org.apache.hadoop.io.Text</value>
240
                </property>
241
                <property>
242
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
243
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
244
                </property>
245
                <!-- resultProject -->
246
                <property>
247
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
248
                    <value>org.apache.hadoop.io.Text</value>
249
                </property>
250
                <property>
251
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
252
                    <value>org.apache.hadoop.io.Text</value>
253
                </property>
254
                <property>
255
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
256
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
257
                </property>
258

    
259
                <!-- project -->
260
                <property>
261
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
262
                    <value>org.apache.hadoop.io.Text</value>
263
                </property>
264
                <property>
265
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
266
                    <value>org.apache.hadoop.io.Text</value>
267
                </property>
268
                <property>
269
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
270
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
271
                </property>
272

    
273
                <!-- resultConcept -->
274
                <property>
275
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
276
                    <value>org.apache.hadoop.io.Text</value>
277
                </property>
278
                <property>
279
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
280
                    <value>org.apache.hadoop.io.Text</value>
281
                </property>
282
                <property>
283
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
284
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
285
                </property>
286

    
287
                <!-- resultClaim -->
288
                <property>
289
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
290
                    <value>org.apache.hadoop.io.Text</value>
291
                </property>
292
                <property>
293
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
294
                    <value>org.apache.hadoop.io.Text</value>
295
                </property>
296
                <property>
297
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
298
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
299
                </property>
300

    
301
                <!-- resultClassification -->
302
                <property>
303
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
304
                    <value>org.apache.hadoop.io.Text</value>
305
                </property>
306
                <property>
307
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
308
                    <value>org.apache.hadoop.io.Text</value>
309
                </property>
310
                <property>
311
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
312
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
313
                </property>
314

    
315
                <!-- resultLanguage -->
316
                <property>
317
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
318
                    <value>org.apache.hadoop.io.Text</value>
319
                </property>
320
                <property>
321
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
322
                    <value>org.apache.hadoop.io.Text</value>
323
                </property>
324
                <property>
325
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
326
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
327
                </property>
328

    
329
                <!-- resultProject -->
330
                <property>
331
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
332
                    <value>org.apache.hadoop.io.Text</value>
333
                </property>
334
                <property>
335
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
336
                    <value>org.apache.hadoop.io.Text</value>
337
                </property>
338
                <property>
339
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
340
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
341
                </property>
342
                <!-- resultResult -->
343
                <property>
344
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
345
                    <value>org.apache.hadoop.io.Text</value>
346
                </property>
347
                <property>
348
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
349
                    <value>org.apache.hadoop.io.Text</value>
350
                </property>
351
                <property>
352
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
353
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
354
                </property>
355
                <!-- resultTopic -->
356
                <property>
357
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
358
                    <value>org.apache.hadoop.io.Text</value>
359
                </property>
360
                <property>
361
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
362
                    <value>org.apache.hadoop.io.Text</value>
363
                </property>
364
                <property>
365
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
366
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
367
                </property>
368

    
369
                <!-- resultDatasource -->
370
                <property>
371
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
372
                    <value>org.apache.hadoop.io.Text</value>
373
                </property>
374
                <property>
375
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
376
                    <value>org.apache.hadoop.io.Text</value>
377
                </property>
378
                <property>
379
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
380
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
381
                </property>
382

    
383

    
384
                <!-- result -->
385
                <property>
386
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
387
                    <value>org.apache.hadoop.io.Text</value>
388
                </property>
389
                <property>
390
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
391
                    <value>org.apache.hadoop.io.Text</value>
392
                </property>
393
                <property>
394
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
395
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
396
                </property>
397

    
398
                <!-- claim -->
399
                <property>
400
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
401
                    <value>org.apache.hadoop.io.Text</value>
402
                </property>
403
                <property>
404
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
405
                    <value>org.apache.hadoop.io.Text</value>
406
                </property>
407
                <property>
408
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
409
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
410
                </property>
411

    
412
                <!-- cncept -->
413

    
414
                <property>
415
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
416
                    <value>org.apache.hadoop.io.Text</value>
417
                </property>
418
                <property>
419
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
420
                    <value>org.apache.hadoop.io.Text</value>
421
                </property>
422
                <property>
423
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
424
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
425
                </property>
426

    
427

    
428
                <!-- persons -->
429

    
430
                <property>
431
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
432
                    <value>org.apache.hadoop.io.Text</value>
433
                </property>
434
                <property>
435
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.value</name>
436
                    <value>org.apache.hadoop.io.Text</value>
437
                </property>
438
                <property>
439
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
440
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
441
                </property>
442

    
443

    
444
                <!-- person results-->
445
                <property>
446
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
447
                    <value>org.apache.hadoop.io.Text</value>
448
                </property>
449
                <property>
450
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.value</name>
451
                    <value>org.apache.hadoop.io.Text</value>
452
                </property>
453
                <property>
454
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
455
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
456
                </property>
457

    
458
                <property>
459
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
460
                    <value>org.apache.hadoop.io.Text</value>
461
                </property>
462
                <property>
463
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.value</name>
464
                    <value>org.apache.hadoop.io.Text</value>
465
                </property>
466
                <property>
467
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
468
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
469
                </property>
470

    
471
                <property>
472
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
473
                    <value>org.apache.hadoop.io.Text</value>
474
                </property>
475
                <property>
476
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.value</name>
477
                    <value>org.apache.hadoop.io.Text</value>
478
                </property>
479
                <property>
480
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
481
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
482
                </property>
483

    
484
                <property>
485
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
486
                    <value>org.apache.hadoop.io.Text</value>
487
                </property>
488
                <property>
489
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.value</name>
490
                    <value>org.apache.hadoop.io.Text</value>
491
                </property>
492
                <property>
493
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
494
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
495
                </property>
496

    
497

    
498
                <property>
499
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
500
                    <value>org.apache.hadoop.io.Text</value>
501
                </property>
502
                <property>
503
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.value</name>
504
                    <value>org.apache.hadoop.io.Text</value>
505
                </property>
506
                <property>
507
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
508
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
509
                </property>
510

    
511
                <property>
512
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
513
                    <value>org.apache.hadoop.io.Text</value>
514
                </property>
515
                <property>
516
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.value</name>
517
                    <value>org.apache.hadoop.io.Text</value>
518
                </property>
519
                <property>
520
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
521
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
522
                </property>
523
                <property>
524
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
525
                    <value>org.apache.hadoop.io.Text</value>
526
                </property>
527
                <property>
528
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.value</name>
529
                    <value>org.apache.hadoop.io.Text</value>
530
                </property>
531
                <property>
532
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
533
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
534
                </property>
535

    
536
                <property>
537
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
538
                    <value>org.apache.hadoop.io.Text</value>
539
                </property>
540
                <property>
541
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.value</name>
542
                    <value>org.apache.hadoop.io.Text</value>
543
                </property>
544
                <property>
545
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
546
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
547
                </property>
548

    
549
                <property>
550
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
551
                    <value>org.apache.hadoop.io.Text</value>
552
                </property>
553
                <property>
554
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.value</name>
555
                    <value>org.apache.hadoop.io.Text</value>
556
                </property>
557
                <property>
558
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
559
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
560
                </property>
561

    
562
                <property>
563
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
564
                    <value>org.apache.hadoop.io.Text</value>
565
                </property>
566
                <property>
567
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.value</name>
568
                    <value>org.apache.hadoop.io.Text</value>
569
                </property>
570
                <property>
571
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
572
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
573
                </property>
574

    
575
                <property>
576
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
577
                    <value>org.apache.hadoop.io.Text</value>
578
                </property>
579
                <property>
580
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.value</name>
581
                    <value>org.apache.hadoop.io.Text</value>
582
                </property>
583
                <property>
584
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
585
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
586
                </property>
587

    
588

    
589
                <property>
590
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
591
                    <value>org.apache.hadoop.io.Text</value>
592
                </property>
593
                <property>
594
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.value</name>
595
                    <value>org.apache.hadoop.io.Text</value>
596
                </property>
597
                <property>
598
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.format</name>
599
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
600
                </property>
601

    
602

    
603
                <!--usage statistics start here-->
604

    
605

    
606
                <!--datasourceStats-->
607
                <property>
608
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
609
                    <value>org.apache.hadoop.io.Text</value>
610
                </property>
611
                <property>
612
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
613
                    <value>org.apache.hadoop.io.Text</value>
614
                </property>
615
                <property>
616
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
617
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
618
                </property>
619
                <!--projectStats-->
620

    
621

    
622
                <property>
623
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
624
                    <value>org.apache.hadoop.io.Text</value>
625
                </property>
626
                <property>
627
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
628
                    <value>org.apache.hadoop.io.Text</value>
629
                </property>
630
                <property>
631
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
632
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
633
                </property>
634

    
635

    
636
                <!--resultStats-->
637

    
638
                <property>
639
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
640
                    <value>org.apache.hadoop.io.Text</value>
641
                </property>
642
                <property>
643
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
644
                    <value>org.apache.hadoop.io.Text</value>
645
                </property>
646
                <property>
647
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
648
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
649
                </property>
650

    
651

    
652
                <!--organizationStats-->
653

    
654
                <property>
655
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
656
                    <value>org.apache.hadoop.io.Text</value>
657
                </property>
658
                <property>
659
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
660
                    <value>org.apache.hadoop.io.Text</value>
661
                </property>
662
                <property>
663
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
664
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
665
                </property>
666

    
667

    
668

    
669
                <!-- ## Classes of mapper and reducer -->
670

    
671
                <property>
672
                    <name>mapreduce.map.class</name>
673
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
674
                </property>
675
                <property>
676
                    <name>mapreduce.reduce.class</name>
677
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
678
                </property>
679
                <property>
680
                    <name>io.serializations</name>
681
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
682
                </property>
683

    
684
                <!-- ## Custom config -->
685

    
686
                <!--delim character used to seperate fields in hdfs dump files <property> -->
687
                <property>
688
                    <name>stats.delim</name>
689
                    <value>${Stats_delim_Character}</value>
690
                </property>
691
                <!--default string for Null String Values -->
692
                <property>
693
                    <name>stats.nullString</name>
694
                    <value>${Stats_null_String_Field}</value>
695
                </property>
696
                <!--default string for Null Numeric Values -->
697
                <property>
698
                    <name>stats.nullNum</name>
699
                    <value>${Stats_null_Numeric_Field}</value>
700
                </property>
701
                <property>
702
                    <name>stats.enclChar</name>
703
                    <value>${Stats_enclosing_Character}</value>
704
                </property>
705

    
706

    
707
                <!--source hbase table -->
708
                <property>
709
                    <name>hbase.mapreduce.inputtable</name>
710
                    <value>${Stats_Hbase_Source_Table}</value>
711
                </property>
712
                <property>
713
                    <name>hbase.mapred.inputtable</name>
714
                    <value>${Stats_Hbase_Source_Table}</value>
715
                </property>
716

    
717
                <property>
718
                    <!-- mapping of protos entities to tables in the relDB -->
719
                    <name>stats.dbTablesMap</name>
720
                    <value>${Stats_db_table_map}</value>
721
                </property>
722

    
723
                <!-- This directory does not correspond to a data store. In fact, this directory only
724
                    contains multiple data stores. It has to be set to the name of the workflow node.
725
                    -->
726
                <property>
727
                    <name>mapred.output.dir</name>
728
                    <value>${Stats_output_Path}</value>
729
                </property>
730
                <property>
731
                    <name>stats.indexConf</name>
732
                    <value>${Stats_indexConf}</value>
733
                </property>
734
                <!-- ## Workflow node parameters -->
735
                <property>
736
                    <name>mapred.reduce.tasks</name>
737
                    <value>${numReducers}</value>
738
                </property>
739

    
740
            </configuration>
741
        </map-reduce>
742
        <ok to="exportContext"/>
743

    
744
        <error to="fail"/>
745
    </action>
746

    
747
    <action name='exportUsageStats'>
748
        <java>
749
            <prepare>
750
            </prepare>
751
            <configuration>
752
                <property>
753
                    <name>mapred.job.queue.name</name>
754
                    <value>${queueName}</value>
755
                </property>
756
            </configuration>
757
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
758
            <arg>${Stats_output_Path}</arg>
759
            <arg>${isLookupEndpoint}</arg>
760
        </java>
761
        <ok to="exportUsageStats"/>
762

    
763
        <error to="fail"/>
764
    </action>
765

    
766
    <action name='exportUsageStats'>
767
        <java>
768
            <prepare>
769
            </prepare>
770
            <configuration>
771
                <property>
772
                    <name>mapred.job.queue.name</name>
773
                    <value>${queueName}</value>
774
                </property>
775
            </configuration>
776
            <main-class>eu.dnetlib.iis.core.workflows.stats.UsageStatsExportWrapper</main-class>
777
            <arg>${Stats_usageDB_url}</arg>
778
            <arg>${Stats_usageDB_Driver}</arg>
779
            <arg>${Stats_delim_Character}</arg>
780
            <arg>${Stats_output_Path}</arg>
781
            <arg>${Stats_usageDB_entities}</arg>
782
        </java>
783

    
784
        <ok to="sqoopImport"/>
785
        <error to="fail"/>
786
    </action>
787

    
788
    <action name="prepareDatabase">
789
        <java>
790
            <prepare>
791
            </prepare>
792
            <configuration>
793
                <property>
794
                    <name>mapred.job.queue.name</name>
795
                    <value>${queueName}</value>
796
                </property>
797
            </configuration>
798

    
799
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
800
            <arg>-SworkingDir=${workingDir}</arg>
801
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
802
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
803
            <arg>-PStats_db_User=${Stats_db_User}</arg>
804
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
805
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
806

    
807
        </java>
808

    
809
        <ok to="sqoopImport"/>
810
        <error to="fail"/>
811
    </action>
812

    
813
    <action name="sqoopImport">
814
        <java>
815
            <prepare>
816
            </prepare>
817
            <configuration>
818
                <property>
819
                    <name>mapred.job.queue.name</name>
820
                    <value>${queueName}</value>
821
                </property>
822

    
823
                <property>
824
                    <name>oozie.sqoop.log.level</name>
825
                    <value>DEBUG</value>
826
                </property>
827

    
828
            </configuration>
829

    
830
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
831
            <arg>-SworkingDir=${workingDir}</arg>
832
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
833
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
834
            <arg>-PStats_db_User=${Stats_db_User}</arg>
835
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
836

    
837
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
838
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
839
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
840
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
841
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
842
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
843
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
844

    
845
        </java>
846
        <ok to="end"/>
847

    
848

    
849
        <error to="fail"/>
850
    </action>
851

    
852
    <action name="finalizeDatabase">
853
        <java>
854
            <prepare>
855
            </prepare>
856
            <configuration>
857
                <property>
858
                    <name>mapred.job.queue.name</name>
859
                    <value>${queueName}</value>
860
                </property>
861
            </configuration>
862

    
863
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
864
            <arg>-SworkingDir=${workingDir}</arg>
865
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
866
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
867
            <arg>-PStats_db_User=${Stats_db_User}</arg>
868
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
869
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
870
        </java>
871
        <ok to="cleanUpHDFS"/>
872
        <error to="fail"/>
873
    </action>
874

    
875
    <action name="cleanUpHDFS">
876
        <fs>
877

    
878
            <delete path="${nameNode}${Stats_output_Path}"/>
879

    
880
        </fs>
881
        <ok to="end"/>
882
        <error to="fail"/>
883
    </action>
884
    <kill name="fail">
885
        <message>
886
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
887
        </message>
888
    </kill>
889
    <end name="end"/>
890
</workflow-app>
    (1-1/1)