Project

General

Profile

1
<workflow-app name="stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5

    
6
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>NORMAL</value>
21
            </property>
22
            <property>
23
                <name>mapred.job.priority</name>
24
                <value>NORMAL</value>
25
            </property>
26
        </configuration>
27
    </global>
28

    
29
    <start to="prepareDatabase"/>
30

    
31
    <action name="prepareDatabase">
32
        <java>
33
            <prepare>
34
            </prepare>
35
            <configuration>
36
                <property>
37
                    <name>mapred.job.queue.name</name>
38
                    <value>${queueName}</value>
39
                </property>
40
            </configuration>
41
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
42
            <arg>-SworkingDir=${workingDir}</arg>
43
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
44
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
45
            <arg>-PStats_db_User=${Stats_db_User}</arg>
46
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
47
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
48
        </java>
49

    
50
        <ok to="mr_export"/>
51
        <!--<ok to="sqoopImport"/>-->
52
        <error to="fail"/>
53
    </action>
54

    
55
    <action name="mr_export">
56
        <map-reduce>
57
            <prepare>
58
                <delete path="${nameNode}${Stats_output_Path}"/>
59
            </prepare>
60
            <configuration>
61
                <property>
62
                    <name>hbase.mapreduce.scan</name>
63
                    <value>${wf:actionData('get-scanner')['scan']}</value>
64
                </property>
65
                <property>
66
                    <name>hbase.rootdir</name>
67
                    <value>$nameNode/hbase</value>
68
                </property>
69
                <property>
70
                    <name>hbase.security.authentication</name>
71
                    <value>simple</value>
72
                </property>
73

    
74
                <!-- ZOOKEEPER -->
75
                <property>
76
                    <name>hbase.zookeeper.quorum</name>
77
                    <value>
78
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
79
                    </value>
80
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
81
                        </value> -->
82
                </property>
83
                <property>
84
                    <name>zookeeper.znode.rootserver</name>
85
                    <value>root-region-server</value>
86

    
87
                </property>
88
                <property>
89
                    <name>hbase.zookeeper.property.clientPort</name>
90
                    <value>2181</value>
91
                </property>
92

    
93
                <!-- MR IO -->
94
                <property>
95
                    <name>mapreduce.inputformat.class</name>
96
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
97
                </property>
98
                <property>
99
                    <name>mapred.mapoutput.key.class</name>
100
                    <value>org.apache.hadoop.io.Text</value>
101
                </property>
102
                <property>
103
                    <name>mapred.mapoutput.value.class</name>
104
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
105
                </property>
106
                <property>
107
                    <name>mapred.output.key.class</name>
108
                    <value>org.apache.hadoop.io.Text</value>
109
                </property>
110
                <property>
111
                    <name>mapred.output.value.class</name>
112
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
113
                </property>
114
                <!-- ## This is required for new MapReduce API usage -->
115
                <property>
116
                    <name>mapred.mapper.new-api</name>
117
                    <value>true</value>
118
                </property>
119
                <property>
120
                    <name>mapred.reducer.new-api</name>
121
                    <value>true</value>
122
                </property>
123
                <!-- # Job-specific options -->
124
                <property>
125
                    <name>dfs.blocksize</name>
126
                    <value>32M</value>
127
                </property>
128
                <property>
129
                    <name>mapred.output.compress</name>
130
                    <value>false</value>
131
                </property>
132
                <property>
133
                    <name>mapred.reduce.tasks.speculative.execution</name>
134
                    <value>false</value>
135
                </property>
136
                <property>
137
                    <name>mapred.reduce.tasks.speculative.execution</name>
138
                    <value>false</value>
139
                </property>
140
                <property>
141
                    <name>mapreduce.map.speculative</name>
142
                    <value>false</value>
143
                </property>
144

    
145
                <!-- I/O FORMAT -->
146
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
147
                    issue with traling tab added between id and value in multiple outputs -->
148
                <property>
149
                    <name>mapred.textoutputformat.separator</name>
150
                    <value>${Stats_delim_Character}</value>
151
                </property>
152
                <!-- ## Names of all output ports -->
153

    
154
                <property>
155
                    <name>mapreduce.multipleoutputs</name>
156

    
157
                    <value>
158
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33} ${out34} ${out35} ${out36} ${out37} ${out38} ${out39}
159
                    </value>
160

    
161
                </property>
162
                <!-- datasource -->
163
                <property>
164
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
165
                    <value>org.apache.hadoop.io.Text</value>
166
                </property>
167
                <property>
168
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
169
                    <value>org.apache.hadoop.io.Text</value>
170
                </property>
171
                <property>
172
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
173
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
174
                </property>
175
                <!-- project -->
176
                <property>
177
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
178
                    <value>org.apache.hadoop.io.Text</value>
179
                </property>
180
                <property>
181
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
182
                    <value>org.apache.hadoop.io.Text</value>
183
                </property>
184
                <property>
185
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
186
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
187
                </property>
188

    
189
                <!-- organization -->
190
                <property>
191
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
192
                    <value>org.apache.hadoop.io.Text</value>
193
                </property>
194
                <property>
195
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
196
                    <value>org.apache.hadoop.io.Text</value>
197
                </property>
198
                <property>
199
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
200
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
201
                </property>
202

    
203
                <!-- datasourceOrganization -->
204
                <property>
205
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
206
                    <value>org.apache.hadoop.io.Text</value>
207
                </property>
208
                <property>
209
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
210
                    <value>org.apache.hadoop.io.Text</value>
211
                </property>
212
                <property>
213
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
214
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
215
                </property>
216

    
217
                <!-- datasourceTopic -->
218
                <property>
219
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
220
                    <value>org.apache.hadoop.io.Text</value>
221
                </property>
222
                <property>
223
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
224
                    <value>org.apache.hadoop.io.Text</value>
225
                </property>
226
                <property>
227
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
228
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
229
                </property>
230
                <!-- datasourceLanguage -->
231
                <property>
232
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
233
                    <value>org.apache.hadoop.io.Text</value>
234
                </property>
235
                <property>
236
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
237
                    <value>org.apache.hadoop.io.Text</value>
238
                </property>
239
                <property>
240
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
241
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
242
                </property>
243

    
244
                <!-- projectOrganization -->
245
                <property>
246
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
247
                    <value>org.apache.hadoop.io.Text</value>
248
                </property>
249
                <property>
250
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
251
                    <value>org.apache.hadoop.io.Text</value>
252
                </property>
253
                <property>
254
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
255
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
256
                </property>
257
                <!-- resultClaim -->
258
                <property>
259
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
260
                    <value>org.apache.hadoop.io.Text</value>
261
                </property>
262
                <property>
263
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
264
                    <value>org.apache.hadoop.io.Text</value>
265
                </property>
266
                <property>
267
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
268
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
269
                </property>
270

    
271
                <!-- resultClassification -->
272
                <property>
273
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
274
                    <value>org.apache.hadoop.io.Text</value>
275
                </property>
276
                <property>
277
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
278
                    <value>org.apache.hadoop.io.Text</value>
279
                </property>
280
                <property>
281
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
282
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
283
                </property>
284

    
285
                <!-- resultConcept -->
286
                <property>
287
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
288
                    <value>org.apache.hadoop.io.Text</value>
289
                </property>
290
                <property>
291
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
292
                    <value>org.apache.hadoop.io.Text</value>
293
                </property>
294
                <property>
295
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
296
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
297
                </property>
298

    
299
                <!-- resultLanguage -->
300
                <property>
301
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
302
                    <value>org.apache.hadoop.io.Text</value>
303
                </property>
304
                <property>
305
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
306
                    <value>org.apache.hadoop.io.Text</value>
307
                </property>
308
                <property>
309
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
310
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
311
                </property>
312

    
313
                <!-- resultOrganization -->
314
                <property>
315
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
316
                    <value>org.apache.hadoop.io.Text</value>
317
                </property>
318
                <property>
319
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
320
                    <value>org.apache.hadoop.io.Text</value>
321
                </property>
322
                <property>
323
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
324
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
325
                </property>
326

    
327
                <!-- resultResult -->
328
                <property>
329
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
330
                    <value>org.apache.hadoop.io.Text</value>
331
                </property>
332
                <property>
333
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
334
                    <value>org.apache.hadoop.io.Text</value>
335
                </property>
336
                <property>
337
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
338
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
339
                </property>
340

    
341
                <!-- resultProject -->
342
                <property>
343
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
344
                    <value>org.apache.hadoop.io.Text</value>
345
                </property>
346
                <property>
347
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
348
                    <value>org.apache.hadoop.io.Text</value>
349
                </property>
350
                <property>
351
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
352
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
353
                </property>
354
                <!-- resultTopic -->
355
                <property>
356
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
357
                    <value>org.apache.hadoop.io.Text</value>
358
                </property>
359
                <property>
360
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
361
                    <value>org.apache.hadoop.io.Text</value>
362
                </property>
363
                <property>
364
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
365
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
366
                </property>
367
                <!-- resultDatasource -->
368
                <property>
369
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
370
                    <value>org.apache.hadoop.io.Text</value>
371
                </property>
372
                <property>
373
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
374
                    <value>org.apache.hadoop.io.Text</value>
375
                </property>
376
                <property>
377
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
378
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
379
                </property>
380

    
381
                <!-- result -->
382
                <property>
383
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
384
                    <value>org.apache.hadoop.io.Text</value>
385
                </property>
386
                <property>
387
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
388
                    <value>org.apache.hadoop.io.Text</value>
389
                </property>
390
                <property>
391
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
392
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
393
                </property>
394

    
395

    
396
                <!-- context -->
397
                <property>
398
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
399
                    <value>org.apache.hadoop.io.Text</value>
400
                </property>
401
                <property>
402
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
403
                    <value>org.apache.hadoop.io.Text</value>
404
                </property>
405
                <property>
406
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
407
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
408
                </property>
409

    
410
                <!-- concept -->
411
                <property>
412
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
413
                    <value>org.apache.hadoop.io.Text</value>
414
                </property>
415
                <property>
416
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
417
                    <value>org.apache.hadoop.io.Text</value>
418
                </property>
419
                <property>
420
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
421
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
422
                </property>
423

    
424
                <!-- category -->
425

    
426
                <property>
427
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
428
                    <value>org.apache.hadoop.io.Text</value>
429
                </property>
430
                <property>
431
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
432
                    <value>org.apache.hadoop.io.Text</value>
433
                </property>
434
                <property>
435
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
436
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
437
                </property>
438

    
439

    
440
                <!-- person -->
441

    
442
                <property>
443
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
444
                    <value>org.apache.hadoop.io.Text</value>
445
                </property>
446
                <property>
447
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.value</name>
448
                    <value>org.apache.hadoop.io.Text</value>
449
                </property>
450
                <property>
451
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
452
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
453
                </property>
454

    
455

    
456
                <!-- personResult -->
457
                <property>
458
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
459
                    <value>org.apache.hadoop.io.Text</value>
460
                </property>
461
                <property>
462
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.value</name>
463
                    <value>org.apache.hadoop.io.Text</value>
464
                </property>
465
                <property>
466
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
467
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
468
                </property>
469

    
470
                <!--resultCitation -->
471
                <property>
472
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
473
                    <value>org.apache.hadoop.io.Text</value>
474
                </property>
475
                <property>
476
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.value</name>
477
                    <value>org.apache.hadoop.io.Text</value>
478
                </property>
479
                <property>
480
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
481
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
482
                </property>
483

    
484
                <!--resultPid -->
485
                <property>
486
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
487
                    <value>org.apache.hadoop.io.Text</value>
488
                </property>
489
                <property>
490
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.value</name>
491
                    <value>org.apache.hadoop.io.Text</value>
492
                </property>
493
                <property>
494
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
495
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
496
                </property>
497

    
498
                <!-- resultOid-->
499

    
500
                <property>
501
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
502
                    <value>org.apache.hadoop.io.Text</value>
503
                </property>
504
                <property>
505
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.value</name>
506
                    <value>org.apache.hadoop.io.Text</value>
507
                </property>
508
                <property>
509
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
510
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
511
                </property>
512

    
513
                <!--projectOid-->
514
                <property>
515
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
516
                    <value>org.apache.hadoop.io.Text</value>
517
                </property>
518
                <property>
519
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.value</name>
520
                    <value>org.apache.hadoop.io.Text</value>
521
                </property>
522
                <property>
523
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
524
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
525
                </property>
526

    
527
                <!-- projectKeyword-->
528
                <property>
529
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
530
                    <value>org.apache.hadoop.io.Text</value>
531
                </property>
532
                <property>
533
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.value</name>
534
                    <value>org.apache.hadoop.io.Text</value>
535
                </property>
536
                <property>
537
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
538
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
539
                </property>
540

    
541
                <!-- projectSubject-->
542

    
543
                <property>
544
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
545
                    <value>org.apache.hadoop.io.Text</value>
546
                </property>
547
                <property>
548
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.value</name>
549
                    <value>org.apache.hadoop.io.Text</value>
550
                </property>
551
                <property>
552
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
553
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
554
                </property>
555

    
556
                <!--  organizationOid-->
557
                <property>
558
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
559
                    <value>org.apache.hadoop.io.Text</value>
560
                </property>
561
                <property>
562
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.value</name>
563
                    <value>org.apache.hadoop.io.Text</value>
564
                </property>
565
                <property>
566
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
567
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
568
                </property>
569

    
570

    
571
                <!--     datasourceOid-->
572

    
573
                <property>
574
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
575
                    <value>org.apache.hadoop.io.Text</value>
576
                </property>
577
                <property>
578
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.value</name>
579
                    <value>org.apache.hadoop.io.Text</value>
580
                </property>
581
                <property>
582
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
583
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
584
                </property>
585

    
586
                <!--personOid-->
587

    
588
                <property>
589
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
590
                    <value>org.apache.hadoop.io.Text</value>
591
                </property>
592
                <property>
593
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.value</name>
594
                    <value>org.apache.hadoop.io.Text</value>
595
                </property>
596
                <property>
597
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
598
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
599
                </property>
600

    
601
                <!--     projectPerson-->
602

    
603
                <property>
604
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
605
                    <value>org.apache.hadoop.io.Text</value>
606
                </property>
607
                <property>
608
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.value</name>
609
                    <value>org.apache.hadoop.io.Text</value>
610
                </property>
611
                <property>
612
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
613
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
614
                </property>
615

    
616
                <!--datasourceStats-->
617
                <property>
618
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
619
                    <value>org.apache.hadoop.io.Text</value>
620
                </property>
621
                <property>
622
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.value</name>
623
                    <value>org.apache.hadoop.io.Text</value>
624
                </property>
625
                <property>
626
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.format</name>
627
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
628
                </property>
629

    
630

    
631
                <!--projectStats-->
632
                <property>
633
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
634
                    <value>org.apache.hadoop.io.Text</value>
635
                </property>
636
                <property>
637
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
638
                    <value>org.apache.hadoop.io.Text</value>
639
                </property>
640
                <property>
641
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
642
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
643
                </property>
644

    
645

    
646
                <!--   organizationStats-->
647
                <property>
648
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
649
                    <value>org.apache.hadoop.io.Text</value>
650
                </property>
651
                <property>
652
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
653
                    <value>org.apache.hadoop.io.Text</value>
654
                </property>
655
                <property>
656
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
657
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
658
                </property>
659

    
660

    
661
                <!--resultStats-->
662

    
663
                <property>
664
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
665
                    <value>org.apache.hadoop.io.Text</value>
666
                </property>
667
                <property>
668
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
669
                    <value>org.apache.hadoop.io.Text</value>
670
                </property>
671
                <property>
672
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
673
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
674
                </property>
675

    
676
                <!--result-->
677

    
678
                <property>
679
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
680
                    <value>org.apache.hadoop.io.Text</value>
681
                </property>
682
                <property>
683
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
684
                    <value>org.apache.hadoop.io.Text</value>
685
                </property>
686
                <property>
687
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
688
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
689
                </property>
690

    
691
                <!--resultDescription-->
692

    
693
                <property>
694
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.key</name>
695
                    <value>org.apache.hadoop.io.Text</value>
696
                </property>
697
                <property>
698
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.value</name>
699
                    <value>org.apache.hadoop.io.Text</value>
700
                </property>
701
                <property>
702
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.format</name>
703
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
704
                </property>
705

    
706

    
707
                <!--orgOrg-->
708

    
709
                <property>
710
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.key</name>
711
                    <value>org.apache.hadoop.io.Text</value>
712
                </property>
713
                <property>
714
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.value</name>
715
                    <value>org.apache.hadoop.io.Text</value>
716
                </property>
717
                <property>
718
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.format</name>
719
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
720
                </property>
721

    
722

    
723
                <!-- ## Classes of mapper and reducer -->
724

    
725
                <property>
726
                    <name>mapreduce.map.class</name>
727
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
728
                </property>
729
                <property>
730
                    <name>mapreduce.reduce.class</name>
731
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
732
                </property>
733
                <property>
734
                    <name>io.serializations</name>
735
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
736
                </property>
737

    
738
                <!-- ## Custom config -->
739

    
740
                <!--delim character used to seperate fields in hdfs dump files <property> -->
741
                <property>
742
                    <name>stats.delim</name>
743
                    <value>${Stats_delim_Character}</value>
744
                </property>
745
                <!--default string for Null String Values -->
746
                <property>
747
                    <name>stats.nullString</name>
748
                    <value>${Stats_null_String_Field}</value>
749
                </property>
750
                <!--default string for Null Numeric Values -->
751
                <property>
752
                    <name>stats.nullNum</name>
753
                    <value>${Stats_null_Numeric_Field}</value>
754
                </property>
755

    
756
                <property>
757
                    <name>stats.enclChar</name>
758
                    <value>${Stats_enclosing_Character}</value>
759
                </property>
760

    
761
                <property>
762
                    <name>stats.getDeletedByInf</name>
763
                    <value>${Stats_getdeletedbyinference}</value>
764
                </property>
765

    
766
                <property>
767
                    <name>hbase.mapreduce.inputtable</name>
768
                    <value>${Stats_Hbase_Source_Table}</value>
769
                </property>
770
                <property>
771
                    <name>UsageStatsRepository</name>
772
                    <value>${Stats_Hbase_Source_Table}</value>
773
                </property>
774

    
775
                <property>
776
                    <!-- mapping of protos entities to tables in the relDB -->
777
                    <name>stats.dbTablesMap</name>
778
                    <value>${Stats_db_table_map}</value>
779
                </property>
780

    
781
                <!-- This directory does not correspond to a data store. In fact, this directory only
782
                    contains multiple data stores. It has to be set to the name of the workflow node.
783
                    -->
784
                <property>
785
                    <name>mapred.output.dir</name>
786
                    <value>${Stats_output_Path}</value>
787
                </property>
788
                <property>
789
                    <name>stats.indexConf</name>
790
                    <value>${Stats_indexConf}</value>
791
                </property>
792
                <!-- ## Workflow node parameters -->
793
                <property>
794
                    <name>mapred.reduce.tasks</name>
795
                    <value>${numReducers}</value>
796
                </property>
797

    
798
            </configuration>
799
        </map-reduce>
800
        <ok to="exportContext"/>
801

    
802
        <error to="fail"/>
803
    </action>
804

    
805
    <action name='exportContext'>
806
        <java>
807
            <prepare>
808
            </prepare>
809
            <configuration>
810
                <property>
811
                    <name>mapred.job.queue.name</name>
812
                    <value>${queueName}</value>
813
                </property>
814
            </configuration>
815
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
816

    
817
            <arg>${Stats_output_Path}</arg>
818
            <arg>${isLookupEndpoint}</arg>
819
        </java>
820
        <ok to="sqoopImport"/>
821

    
822
        <error to="fail"/>
823
    </action>
824

    
825
    <action name="sqoopImport">
826
        <java>
827
            <prepare>
828
            </prepare>
829
            <configuration>
830
                <property>
831
                    <name>mapred.job.queue.name</name>
832
                    <value>${queueName}</value>
833
                </property>
834

    
835
                <property>
836
                    <name>oozie.sqoop.log.level</name>
837
                    <value>DEBUG</value>
838
                </property>
839

    
840
            </configuration>
841

    
842
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
843
            <arg>-SworkingDir=${workingDir}</arg>
844
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
845
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
846
            <arg>-PStats_db_User=${Stats_db_User}</arg>
847
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
848
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
849
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
850
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
851
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
852
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
853
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
854
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
855

    
856
        </java>
857
        <ok to="finalizeDatabase"/>
858

    
859

    
860
        <error to="fail"/>
861
    </action>
862

    
863
    <action name="finalizeDatabase">
864
        <java>
865
            <prepare>
866
            </prepare>
867
            <configuration>
868
                <property>
869
                    <name>mapred.job.queue.name</name>
870
                    <value>${queueName}</value>
871
                </property>
872
            </configuration>
873

    
874
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
875
            <arg>-SworkingDir=${workingDir}</arg>
876
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
877
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
878
            <arg>-PStats_db_User=${Stats_db_User}</arg>
879
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
880
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
881
        </java>
882
        <!--<ok to="end"/>-->
883
        <ok to="exportUsageStats"/>
884
        <error to="fail"/>
885
    </action>
886

    
887
    <action name='exportUsageStats'>
888
        <java>
889
            <prepare>
890
                <delete path="${nameNode}${piwik_logsPath}"/>
891
                <mkdir path="${nameNode}${piwik_logsPath}"/>
892
            </prepare>
893
            <configuration>
894
                <property>
895
                    <name>mapred.job.queue.name</name>
896
                    <value>${queueName}</value>
897
                </property>
898

    
899
                <property>
900
                    <name>oozie.sqoop.log.level</name>
901
                    <value>DEBUG</value>
902
                </property>
903
            </configuration>
904

    
905
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
906
            <arg>-SworkingDir=${workingDir}</arg>
907
            <arg>eu.dnetlib.iis.core.workflows.stats.UsageStatsWrapper</arg>
908
            <arg>-Ppiwik_username=${piwik_username}</arg>
909
            <arg>-Ppiwik_pass=${piwik_pass}</arg>
910
            <arg>-Ppiwik_httpProtocol=${piwik_httpProtocol}</arg>
911
            <arg>-Ppiwik_url=${piwik_url}</arg>
912
            <arg>-Ppiwik_siteId=${piwik_siteId}</arg>
913
            <arg>-Ppiwik_startDate=${piwik_startDate}</arg>
914
            <arg>-Ppiwik_finalDate=${piwik_finalDate}</arg>
915
            <arg>-Ppiwik_logsPath=${piwik_logsPath}</arg>
916
            <arg>-Ppiwik_filterOffset=${piwik_filterOffset}</arg>
917
            <arg>-Ppiwiki_schema=${piwiki_schema}</arg>
918
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
919
            <arg>-PStats_db_User=${Stats_db_User}</arg>
920
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
921
            <arg>-Ptoken_auth=${token_auth}</arg>
922

    
923
<!--
924
            <arg>-Pirus_baseuRL=${irus_baseuRL}</arg>
925
            <arg>-Pirus_reportType=${irus_reportType}</arg>
926
            <arg>-Pirus_release=${irus_release}</arg>
927
            <arg>-Pirus_requestorid=${irus_requestorid}</arg>
928
            <arg>-Pirus_repositoryid=${irus_repositoryid}</arg>
929
            <arg>-Pirus_period_granularity=${irus_period_granularity}</arg>
930
            <arg>-Pirus_period=${irus_period}</arg>
931
            <arg>-Pirus_token_auth=${irus_token_auth}</arg>
932
-->
933

    
934

    
935
        </java>
936

    
937
        <ok to="cleanUpHDFS"/>
938
        <error to="fail"/>
939
    </action>
940

    
941

    
942
    <action name="cleanUpHDFS">
943
        <fs>
944

    
945
            <delete path="${nameNode}${Stats_output_Path}"/>
946
            <delete path="${nameNode}${piwik_logsPath}"/>
947

    
948
        </fs>
949
        <ok to="end"/>
950
        <error to="fail"/>
951
    </action>
952
    <kill name="fail">
953
        <message>
954
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
955
        </message>
956
    </kill>
957
    <end name="end"/>
958
</workflow-app>
    (1-1/1)