Project

General

Profile

1
<workflow-app name="stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5

    
6
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>NORMAL</value>
21
            </property>
22
            <property>
23
                <name>mapred.job.priority</name>
24
                <value>NORMAL</value>
25
            </property>
26
        </configuration>
27
    </global>
28

    
29
    <start to="mr_export"/>
30

    
31
    <action name="mr_export">
32
        <map-reduce>
33

    
34
            <prepare>
35
                <delete path="${nameNode}${Stats_output_Path}"/>
36

    
37
            </prepare>
38
            <configuration>
39
                <property>
40
                    <name>hbase.mapreduce.scan</name>
41
                    <value>${wf:actionData('get-scanner')['scan']}</value>
42
                </property>
43
                <property>
44
                    <name>hbase.rootdir</name>
45
                    <value>$nameNode/hbase</value>
46

    
47
                </property>
48

    
49
                <property>
50
                    <name>hbase.security.authentication</name>
51
                    <value>simple</value>
52
                </property>
53
                <!-- ZOOKEEPER -->
54

    
55
                <property>
56
                    <name>hbase.zookeeper.quorum</name>
57
                    <value>
58
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
59
                    </value>
60
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
61
                        </value> -->
62
                </property>
63
                <property>
64
                    <name>zookeeper.znode.rootserver</name>
65
                    <value>root-region-server</value>
66

    
67
                </property>
68

    
69
                <property>
70
                    <name>hbase.zookeeper.property.clientPort</name>
71
                    <value>2181</value>
72
                </property>
73

    
74

    
75
                <!-- MR IO -->
76

    
77

    
78
                <property>
79
                    <name>mapreduce.inputformat.class</name>
80
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
81
                </property>
82

    
83
                <property>
84
                    <name>mapred.mapoutput.key.class</name>
85
                    <value>org.apache.hadoop.io.Text</value>
86
                </property>
87
                <property>
88
                    <name>mapred.mapoutput.value.class</name>
89
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
90
                </property>
91
                <property>
92
                    <name>mapred.output.key.class</name>
93
                    <value>org.apache.hadoop.io.Text</value>
94
                </property>
95
                <property>
96
                    <name>mapred.output.value.class</name>
97
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
98
                </property>
99

    
100
                <!-- ## This is required for new MapReduce API usage -->
101
                <property>
102
                    <name>mapred.mapper.new-api</name>
103
                    <value>true</value>
104
                </property>
105
                <property>
106
                    <name>mapred.reducer.new-api</name>
107
                    <value>true</value>
108
                </property>
109

    
110
                <!-- # Job-specific options -->
111
                <property>
112
                    <name>dfs.blocksize</name>
113
                    <value>32M</value>
114
                </property>
115
                <property>
116
                    <name>mapred.output.compress</name>
117
                    <value>false</value>
118
                </property>
119
                <property>
120
                    <name>mapred.reduce.tasks.speculative.execution</name>
121
                    <value>false</value>
122
                </property>
123
                <property>
124
                    <name>mapred.reduce.tasks.speculative.execution</name>
125
                    <value>false</value>
126
                </property>
127
                <property>
128
                    <name>mapreduce.map.speculative</name>
129
                    <value>false</value>
130
                </property>
131

    
132
                <!-- I/O FORMAT -->
133
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
134
                    issue with traling tab added between id and value in multiple outputs -->
135
                <property>
136
                    <name>mapred.textoutputformat.separator</name>
137
                    <value>${Stats_delim_Character}</value>
138
                </property>
139
                <!-- ## Names of all output ports -->
140

    
141
                <property>
142
                    <name>mapreduce.multipleoutputs</name>
143

    
144
                    <value>
145
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33} ${out34} ${out35} ${out36} ${out37} ${out38} ${out39}
146
                    </value>
147

    
148
                </property>
149
                <!-- datasource -->
150
                <property>
151
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
152
                    <value>org.apache.hadoop.io.Text</value>
153
                </property>
154
                <property>
155
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
156
                    <value>org.apache.hadoop.io.Text</value>
157
                </property>
158
                <property>
159
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
160
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
161
                </property>
162
                <!-- project -->
163
                <property>
164
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
165
                    <value>org.apache.hadoop.io.Text</value>
166
                </property>
167
                <property>
168
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
169
                    <value>org.apache.hadoop.io.Text</value>
170
                </property>
171
                <property>
172
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
173
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
174
                </property>
175

    
176
                <!-- organization -->
177
                <property>
178
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
179
                    <value>org.apache.hadoop.io.Text</value>
180
                </property>
181
                <property>
182
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
183
                    <value>org.apache.hadoop.io.Text</value>
184
                </property>
185
                <property>
186
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
187
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
188
                </property>
189

    
190
                <!-- datasourceOrganization -->
191
                <property>
192
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
193
                    <value>org.apache.hadoop.io.Text</value>
194
                </property>
195
                <property>
196
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
197
                    <value>org.apache.hadoop.io.Text</value>
198
                </property>
199
                <property>
200
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
201
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
202
                </property>
203

    
204
                <!-- datasourceTopic -->
205
                <property>
206
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
207
                    <value>org.apache.hadoop.io.Text</value>
208
                </property>
209
                <property>
210
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
211
                    <value>org.apache.hadoop.io.Text</value>
212
                </property>
213
                <property>
214
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
215
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
216
                </property>
217
                <!-- datasourceLanguage -->
218
                <property>
219
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
220
                    <value>org.apache.hadoop.io.Text</value>
221
                </property>
222
                <property>
223
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
224
                    <value>org.apache.hadoop.io.Text</value>
225
                </property>
226
                <property>
227
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
228
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
229
                </property>
230

    
231
                <!-- projectOrganization -->
232
                <property>
233
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
234
                    <value>org.apache.hadoop.io.Text</value>
235
                </property>
236
                <property>
237
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
238
                    <value>org.apache.hadoop.io.Text</value>
239
                </property>
240
                <property>
241
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
242
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
243
                </property>
244
                <!-- resultClaim -->
245
                <property>
246
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
247
                    <value>org.apache.hadoop.io.Text</value>
248
                </property>
249
                <property>
250
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
251
                    <value>org.apache.hadoop.io.Text</value>
252
                </property>
253
                <property>
254
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
255
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
256
                </property>
257

    
258
                <!-- resultClassification -->
259
                <property>
260
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
261
                    <value>org.apache.hadoop.io.Text</value>
262
                </property>
263
                <property>
264
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
265
                    <value>org.apache.hadoop.io.Text</value>
266
                </property>
267
                <property>
268
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
269
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
270
                </property>
271

    
272
                <!-- resultConcept -->
273
                <property>
274
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
275
                    <value>org.apache.hadoop.io.Text</value>
276
                </property>
277
                <property>
278
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
279
                    <value>org.apache.hadoop.io.Text</value>
280
                </property>
281
                <property>
282
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
283
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
284
                </property>
285

    
286
                <!-- resultLanguage -->
287
                <property>
288
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
289
                    <value>org.apache.hadoop.io.Text</value>
290
                </property>
291
                <property>
292
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
293
                    <value>org.apache.hadoop.io.Text</value>
294
                </property>
295
                <property>
296
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
297
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
298
                </property>
299

    
300
                <!-- resultOrganization -->
301
                <property>
302
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
303
                    <value>org.apache.hadoop.io.Text</value>
304
                </property>
305
                <property>
306
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
307
                    <value>org.apache.hadoop.io.Text</value>
308
                </property>
309
                <property>
310
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
311
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
312
                </property>
313

    
314
                <!-- resultResult -->
315
                <property>
316
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
317
                    <value>org.apache.hadoop.io.Text</value>
318
                </property>
319
                <property>
320
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
321
                    <value>org.apache.hadoop.io.Text</value>
322
                </property>
323
                <property>
324
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
325
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
326
                </property>
327

    
328
                <!-- resultProject -->
329
                <property>
330
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
331
                    <value>org.apache.hadoop.io.Text</value>
332
                </property>
333
                <property>
334
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
335
                    <value>org.apache.hadoop.io.Text</value>
336
                </property>
337
                <property>
338
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
339
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
340
                </property>
341
                <!-- resultTopic -->
342
                <property>
343
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
344
                    <value>org.apache.hadoop.io.Text</value>
345
                </property>
346
                <property>
347
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
348
                    <value>org.apache.hadoop.io.Text</value>
349
                </property>
350
                <property>
351
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
352
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
353
                </property>
354
                <!-- resultDatasource -->
355
                <property>
356
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
357
                    <value>org.apache.hadoop.io.Text</value>
358
                </property>
359
                <property>
360
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
361
                    <value>org.apache.hadoop.io.Text</value>
362
                </property>
363
                <property>
364
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
365
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
366
                </property>
367

    
368
                <!-- result -->
369
                <property>
370
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
371
                    <value>org.apache.hadoop.io.Text</value>
372
                </property>
373
                <property>
374
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
375
                    <value>org.apache.hadoop.io.Text</value>
376
                </property>
377
                <property>
378
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
379
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
380
                </property>
381

    
382

    
383
                <!-- context -->
384
                <property>
385
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
386
                    <value>org.apache.hadoop.io.Text</value>
387
                </property>
388
                <property>
389
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
390
                    <value>org.apache.hadoop.io.Text</value>
391
                </property>
392
                <property>
393
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
394
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
395
                </property>
396

    
397
                <!-- concept -->
398
                <property>
399
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
400
                    <value>org.apache.hadoop.io.Text</value>
401
                </property>
402
                <property>
403
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
404
                    <value>org.apache.hadoop.io.Text</value>
405
                </property>
406
                <property>
407
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
408
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
409
                </property>
410

    
411
                <!-- category -->
412

    
413
                <property>
414
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
415
                    <value>org.apache.hadoop.io.Text</value>
416
                </property>
417
                <property>
418
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
419
                    <value>org.apache.hadoop.io.Text</value>
420
                </property>
421
                <property>
422
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
423
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
424
                </property>
425

    
426

    
427
                <!-- person -->
428

    
429
                <property>
430
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
431
                    <value>org.apache.hadoop.io.Text</value>
432
                </property>
433
                <property>
434
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.value</name>
435
                    <value>org.apache.hadoop.io.Text</value>
436
                </property>
437
                <property>
438
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
439
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
440
                </property>
441

    
442

    
443
                <!-- personResult -->
444
                <property>
445
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
446
                    <value>org.apache.hadoop.io.Text</value>
447
                </property>
448
                <property>
449
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.value</name>
450
                    <value>org.apache.hadoop.io.Text</value>
451
                </property>
452
                <property>
453
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
454
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
455
                </property>
456

    
457
                <!--resultCitation -->
458
                <property>
459
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
460
                    <value>org.apache.hadoop.io.Text</value>
461
                </property>
462
                <property>
463
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.value</name>
464
                    <value>org.apache.hadoop.io.Text</value>
465
                </property>
466
                <property>
467
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
468
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
469
                </property>
470

    
471
                <!--resultPid -->
472
                <property>
473
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
474
                    <value>org.apache.hadoop.io.Text</value>
475
                </property>
476
                <property>
477
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.value</name>
478
                    <value>org.apache.hadoop.io.Text</value>
479
                </property>
480
                <property>
481
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
482
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
483
                </property>
484

    
485
                <!-- resultOid-->
486

    
487
                <property>
488
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
489
                    <value>org.apache.hadoop.io.Text</value>
490
                </property>
491
                <property>
492
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.value</name>
493
                    <value>org.apache.hadoop.io.Text</value>
494
                </property>
495
                <property>
496
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
497
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
498
                </property>
499

    
500
                <!--projectOid-->
501
                <property>
502
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
503
                    <value>org.apache.hadoop.io.Text</value>
504
                </property>
505
                <property>
506
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.value</name>
507
                    <value>org.apache.hadoop.io.Text</value>
508
                </property>
509
                <property>
510
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
511
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
512
                </property>
513

    
514
                <!-- projectKeyword-->
515
                <property>
516
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
517
                    <value>org.apache.hadoop.io.Text</value>
518
                </property>
519
                <property>
520
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.value</name>
521
                    <value>org.apache.hadoop.io.Text</value>
522
                </property>
523
                <property>
524
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
525
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
526
                </property>
527

    
528
                <!-- projectSubject-->
529

    
530
                <property>
531
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
532
                    <value>org.apache.hadoop.io.Text</value>
533
                </property>
534
                <property>
535
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.value</name>
536
                    <value>org.apache.hadoop.io.Text</value>
537
                </property>
538
                <property>
539
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
540
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
541
                </property>
542

    
543
                <!--  organizationOid-->
544
                <property>
545
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
546
                    <value>org.apache.hadoop.io.Text</value>
547
                </property>
548
                <property>
549
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.value</name>
550
                    <value>org.apache.hadoop.io.Text</value>
551
                </property>
552
                <property>
553
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
554
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
555
                </property>
556

    
557

    
558
                <!--     datasourceOid-->
559

    
560
                <property>
561
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
562
                    <value>org.apache.hadoop.io.Text</value>
563
                </property>
564
                <property>
565
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.value</name>
566
                    <value>org.apache.hadoop.io.Text</value>
567
                </property>
568
                <property>
569
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
570
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
571
                </property>
572

    
573
                <!--personOid-->
574

    
575
                <property>
576
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
577
                    <value>org.apache.hadoop.io.Text</value>
578
                </property>
579
                <property>
580
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.value</name>
581
                    <value>org.apache.hadoop.io.Text</value>
582
                </property>
583
                <property>
584
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
585
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
586
                </property>
587

    
588
                <!--     projectPerson-->
589

    
590
                <property>
591
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
592
                    <value>org.apache.hadoop.io.Text</value>
593
                </property>
594
                <property>
595
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.value</name>
596
                    <value>org.apache.hadoop.io.Text</value>
597
                </property>
598
                <property>
599
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
600
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
601
                </property>
602

    
603
                <!--datasourceStats-->
604
                <property>
605
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
606
                    <value>org.apache.hadoop.io.Text</value>
607
                </property>
608
                <property>
609
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.value</name>
610
                    <value>org.apache.hadoop.io.Text</value>
611
                </property>
612
                <property>
613
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.format</name>
614
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
615
                </property>
616

    
617

    
618
                <!--projectStats-->
619
                <property>
620
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
621
                    <value>org.apache.hadoop.io.Text</value>
622
                </property>
623
                <property>
624
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
625
                    <value>org.apache.hadoop.io.Text</value>
626
                </property>
627
                <property>
628
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
629
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
630
                </property>
631

    
632

    
633
                <!--   organizationStats-->
634
                <property>
635
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
636
                    <value>org.apache.hadoop.io.Text</value>
637
                </property>
638
                <property>
639
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
640
                    <value>org.apache.hadoop.io.Text</value>
641
                </property>
642
                <property>
643
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
644
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
645
                </property>
646

    
647

    
648
                <!--resultStats-->
649

    
650
                <property>
651
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
652
                    <value>org.apache.hadoop.io.Text</value>
653
                </property>
654
                <property>
655
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
656
                    <value>org.apache.hadoop.io.Text</value>
657
                </property>
658
                <property>
659
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
660
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
661
                </property>
662

    
663
                <!--result-->
664

    
665
                <property>
666
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
667
                    <value>org.apache.hadoop.io.Text</value>
668
                </property>
669
                <property>
670
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
671
                    <value>org.apache.hadoop.io.Text</value>
672
                </property>
673
                <property>
674
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
675
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
676
                </property>
677

    
678
                <!--resultDescription-->
679

    
680
                <property>
681
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.key</name>
682
                    <value>org.apache.hadoop.io.Text</value>
683
                </property>
684
                <property>
685
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.value</name>
686
                    <value>org.apache.hadoop.io.Text</value>
687
                </property>
688
                <property>
689
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.format</name>
690
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
691
                </property>
692

    
693

    
694
                <!--orgOrg-->
695

    
696
                <property>
697
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.key</name>
698
                    <value>org.apache.hadoop.io.Text</value>
699
                </property>
700
                <property>
701
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.value</name>
702
                    <value>org.apache.hadoop.io.Text</value>
703
                </property>
704
                <property>
705
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.format</name>
706
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
707
                </property>
708

    
709

    
710
                <!-- ## Classes of mapper and reducer -->
711

    
712
                <property>
713
                    <name>mapreduce.map.class</name>
714
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
715
                </property>
716
                <property>
717
                    <name>mapreduce.reduce.class</name>
718
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
719
                </property>
720
                <property>
721
                    <name>io.serializations</name>
722
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
723
                </property>
724

    
725
                <!-- ## Custom config -->
726

    
727
                <!--delim character used to seperate fields in hdfs dump files <property> -->
728
                <property>
729
                    <name>stats.delim</name>
730
                    <value>${Stats_delim_Character}</value>
731
                </property>
732
                <!--default string for Null String Values -->
733
                <property>
734
                    <name>stats.nullString</name>
735
                    <value>${Stats_null_String_Field}</value>
736
                </property>
737
                <!--default string for Null Numeric Values -->
738
                <property>
739
                    <name>stats.nullNum</name>
740
                    <value>${Stats_null_Numeric_Field}</value>
741
                </property>
742

    
743
                <property>
744
                    <name>stats.enclChar</name>
745
                    <value>${Stats_enclosing_Character}</value>
746
                </property>
747

    
748
                <property>
749
                    <name>stats.getDeletedByInf</name>
750
                    <value>${Stats_getdeletedbyinference}</value>
751
                </property>
752

    
753
                <!--
754
                                <property>
755
                                    <name>stats.mergedPass</name>
756
                                    <value>${Stats_db_Merged_Pass}</value>
757
                                </property>
758

    
759

    
760
                                <property>
761
                                    <name>stats.mergeduser</name>
762
                                    <value>${Stats_db_Merged_User}</value>
763
                                </property>-->
764

    
765

    
766
                <!--source hbase table -->
767
                <property>
768
                    <name>hbase.mapreduce.inputtable</name>
769
                    <value>${Stats_Hbase_Source_Table}</value>
770
                </property>
771
                <property>
772
                    <name>hbase.mapred.inputtable</name>
773
                    <value>${Stats_Hbase_Source_Table}</value>
774
                </property>
775

    
776
                <property>
777
                    <!-- mapping of protos entities to tables in the relDB -->
778
                    <name>stats.dbTablesMap</name>
779
                    <value>${Stats_db_table_map}</value>
780
                </property>
781

    
782
                <!-- This directory does not correspond to a data store. In fact, this directory only
783
                    contains multiple data stores. It has to be set to the name of the workflow node.
784
                    -->
785
                <property>
786
                    <name>mapred.output.dir</name>
787
                    <value>${Stats_output_Path}</value>
788
                </property>
789
                <property>
790
                    <name>stats.indexConf</name>
791
                    <value>${Stats_indexConf}</value>
792
                </property>
793
                <!-- ## Workflow node parameters -->
794
                <property>
795
                    <name>mapred.reduce.tasks</name>
796
                    <value>${numReducers}</value>
797
                </property>
798

    
799
            </configuration>
800
        </map-reduce>
801
        <ok to="exportContext"/>
802

    
803
        <error to="fail"/>
804
    </action>
805

    
806
    <action name='exportContext'>
807
        <java>
808
            <prepare>
809
            </prepare>
810
            <configuration>
811
                <property>
812
                    <name>mapred.job.queue.name</name>
813
                    <value>${queueName}</value>
814
                </property>
815
            </configuration>
816
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
817

    
818
            <arg>${Stats_output_Path}</arg>
819
            <arg>${isLookupEndpoint}</arg>
820
        </java>
821
        <ok to="prepareDatabase"/>
822

    
823
        <error to="fail"/>
824
    </action>
825

    
826

    
827
    <action name="prepareDatabase">
828
        <java>
829
            <prepare>
830
            </prepare>
831
            <configuration>
832
                <property>
833
                    <name>mapred.job.queue.name</name>
834
                    <value>${queueName}</value>
835
                </property>
836
            </configuration>
837
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
838
            <arg>-SworkingDir=${workingDir}</arg>
839
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
840
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
841
            <arg>-PStats_db_User=${Stats_db_User}</arg>
842
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
843
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
844
        </java>
845

    
846
        <ok to="sqoopImport"/>
847
        <error to="fail"/>
848
    </action>
849

    
850
    <action name="sqoopImport">
851
        <java>
852
            <prepare>
853
            </prepare>
854
            <configuration>
855
                <property>
856
                    <name>mapred.job.queue.name</name>
857
                    <value>${queueName}</value>
858
                </property>
859

    
860
                <property>
861
                    <name>oozie.sqoop.log.level</name>
862
                    <value>DEBUG</value>
863
                </property>
864

    
865
            </configuration>
866

    
867
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
868
            <arg>-SworkingDir=${workingDir}</arg>
869
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
870
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
871
            <arg>-PStats_db_User=${Stats_db_User}</arg>
872
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
873
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
874
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
875
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
876
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
877
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
878
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
879
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
880

    
881
        </java>
882
        <ok to="finalizeDatabase"/>
883

    
884

    
885
        <error to="fail"/>
886
    </action>
887

    
888
    <action name="finalizeDatabase">
889
        <java>
890
            <prepare>
891
            </prepare>
892
            <configuration>
893
                <property>
894
                    <name>mapred.job.queue.name</name>
895
                    <value>${queueName}</value>
896
                </property>
897
            </configuration>
898

    
899
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
900
            <arg>-SworkingDir=${workingDir}</arg>
901
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
902
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
903
            <arg>-PStats_db_User=${Stats_db_User}</arg>
904
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
905
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
906
        </java>
907
        <ok to="exportUsageStats"/>
908
        <error to="fail"/>
909
    </action>
910

    
911
    <action name='exportUsageStats'>
912
        <java>
913
            <prepare>
914
                <delete path="${nameNode}${piwik_logsPath}"/>
915
                <mkdir path="${nameNode}${piwik_logsPath}"/>
916
            </prepare>
917
            <configuration>
918
                <property>
919
                    <name>mapred.job.queue.name</name>
920
                    <value>${queueName}</value>
921
                </property>
922

    
923
                <property>
924
                    <name>oozie.sqoop.log.level</name>
925
                    <value>DEBUG</value>
926
                </property>
927
            </configuration>
928

    
929
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
930
            <arg>-SworkingDir=${workingDir}</arg>
931
            <arg>eu.dnetlib.iis.core.workflows.stats.UsageStatsWrapper</arg>
932
            <arg>-Ppiwik_username=${piwik_username}</arg>
933
            <arg>-Ppiwik_pass=${piwik_pass}</arg>
934
            <arg>-Ppiwik_httpProtocol=${piwik_httpProtocol}</arg>
935
            <arg>-Ppiwik_url=${piwik_url}</arg>
936
            <arg>-Ppiwik_siteId=${piwik_siteId}</arg>
937
            <arg>-Ppiwik_startDate=${piwik_startDate}</arg>
938
            <arg>-Ppiwik_finalDate=${piwik_finalDate}</arg>
939
            <arg>-Ppiwik_logsPath=${piwik_logsPath}</arg>
940
            <arg>-Ppiwik_filterOffset=${piwik_filterOffset}</arg>
941
            <arg>-Ppiwiki_schema=${piwiki_schema}</arg>
942
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
943
            <arg>-PStats_db_User=${Stats_db_User}</arg>
944
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
945
            <arg>-Ptoken_auth=${token_auth}</arg>
946

    
947
<!--
948
            <arg>-Pirus_baseuRL=${irus_baseuRL}</arg>
949
            <arg>-Pirus_reportType=${irus_reportType}</arg>
950
            <arg>-Pirus_release=${irus_release}</arg>
951
            <arg>-Pirus_requestorid=${irus_requestorid}</arg>
952
            <arg>-Pirus_repositoryid=${irus_repositoryid}</arg>
953
            <arg>-Pirus_period_granularity=${irus_period_granularity}</arg>
954
            <arg>-Pirus_period=${irus_period}</arg>
955
            <arg>-Pirus_token_auth=${irus_token_auth}</arg>
956
-->
957

    
958

    
959
        </java>
960

    
961
        <ok to="cleanUpHDFS"/>
962
        <error to="fail"/>
963
    </action>
964

    
965

    
966
    <action name="cleanUpHDFS">
967
        <fs>
968

    
969
            <delete path="${nameNode}${Stats_output_Path}"/>
970
            <delete path="${nameNode}${piwik_logsPath}"/>
971

    
972
        </fs>
973
        <ok to="end"/>
974
        <error to="fail"/>
975
    </action>
976
    <kill name="fail">
977
        <message>
978
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
979
        </message>
980
    </kill>
981
    <end name="end"/>
982
</workflow-app>
    (1-1/1)