Project

General

Profile

1
<workflow-app name="stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5

    
6
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>NORMAL</value>
21
            </property>
22
            <property>
23
                <name>mapred.job.priority</name>
24
                <value>NORMAL</value>
25
            </property>
26
        </configuration>
27
    </global>
28
    <start to="mr_export"/>
29

    
30
    <action name='getScanner'>
31
        <java>
32
            <main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator</main-class>
33
            <arg>
34

    
35

    
36
                -f datasource, project , organization,person,result,
37
                projectOrganization_participation_hasParticipant,
38
                resultProject_outcome_produces,datasourceOrganization_provision_provides,
39
                resultResult_publicationDataset_isRelatedTo,
40
                <!--                resultResult_dedup_merges,resultResult_dedup_isMergedIn,-->
41
                organizationOrganization_dedup_isMergedIn,
42
                organizationOrganization_dedup_merges,
43
                projectOrganization_participation_isParticipant,
44
                personResult_authorship_hasAuthor
45

    
46

    
47

    
48

    
49
            </arg>
50
            <capture-output/>
51
        </java>
52
        <ok to="mr_export"/>
53
        <error to="fail"/>
54
    </action>
55

    
56

    
57
    <action name="mr_export">
58
        <map-reduce>
59

    
60
            <prepare>
61
                <delete path="${nameNode}${Stats_output_Path}"/>
62

    
63
            </prepare>
64
            <configuration>
65
                <property>
66
                    <name>hbase.mapreduce.scan</name>
67
                    <value>${wf:actionData('get-scanner')['scan']}</value>
68
                </property>
69
                <property>
70
                    <name>hbase.rootdir</name>
71
                    <value>$nameNode/hbase</value>
72

    
73
                </property>
74

    
75
                <property>
76
                    <name>hbase.security.authentication</name>
77
                    <value>simple</value>
78
                </property>
79
                <!-- ZOOKEEPER -->
80

    
81
                <property>
82
                    <name>hbase.zookeeper.quorum</name>
83
                    <value>
84
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
85
                    </value>
86
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
87
                        </value> -->
88
                </property>
89
                <property>
90
                    <name>zookeeper.znode.rootserver</name>
91
                    <value>root-region-server</value>
92

    
93
                </property>
94

    
95
                <property>
96
                    <name>hbase.zookeeper.property.clientPort</name>
97
                    <value>2181</value>
98
                </property>
99

    
100

    
101
                <!-- MR IO -->
102

    
103

    
104
                <property>
105
                    <name>mapreduce.inputformat.class</name>
106
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
107
                </property>
108

    
109
                <property>
110
                    <name>mapred.mapoutput.key.class</name>
111
                    <value>org.apache.hadoop.io.Text</value>
112
                </property>
113
                <property>
114
                    <name>mapred.mapoutput.value.class</name>
115
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
116
                </property>
117
                <property>
118
                    <name>mapred.output.key.class</name>
119
                    <value>org.apache.hadoop.io.Text</value>
120
                </property>
121
                <property>
122
                    <name>mapred.output.value.class</name>
123
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
124
                </property>
125

    
126
                <!-- ## This is required for new MapReduce API usage -->
127
                <property>
128
                    <name>mapred.mapper.new-api</name>
129
                    <value>true</value>
130
                </property>
131
                <property>
132
                    <name>mapred.reducer.new-api</name>
133
                    <value>true</value>
134
                </property>
135

    
136
                <!-- # Job-specific options -->
137
                <property>
138
                    <name>dfs.blocksize</name>
139
                    <value>32M</value>
140
                </property>
141
                <property>
142
                    <name>mapred.output.compress</name>
143
                    <value>false</value>
144
                </property>
145
                <property>
146
                    <name>mapred.reduce.tasks.speculative.execution</name>
147
                    <value>false</value>
148
                </property>
149
                <property>
150
                    <name>mapred.reduce.tasks.speculative.execution</name>
151
                    <value>false</value>
152
                </property>
153
                <property>
154
                    <name>mapreduce.map.speculative</name>
155
                    <value>false</value>
156
                </property>
157

    
158
                <!-- I/O FORMAT -->
159
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
160
                    issue with traling tab added between id and value in multiple outputs -->
161
                <property>
162
                    <name>mapred.textoutputformat.separator</name>
163
                    <value>${Stats_delim_Character}</value>
164
                </property>
165
                <!-- ## Names of all output ports -->
166

    
167
                <property>
168
                    <name>mapreduce.multipleoutputs</name>
169

    
170
                    <value>
171
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33} ${out34} ${out35} ${out36}${out37} ${out38}
172
                    </value>
173

    
174
                </property>
175
                <!-- datasource -->
176
                <property>
177
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
178
                    <value>org.apache.hadoop.io.Text</value>
179
                </property>
180
                <property>
181
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
182
                    <value>org.apache.hadoop.io.Text</value>
183
                </property>
184
                <property>
185
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
186
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
187
                </property>
188
                <!-- project -->
189
                <property>
190
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
191
                    <value>org.apache.hadoop.io.Text</value>
192
                </property>
193
                <property>
194
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
195
                    <value>org.apache.hadoop.io.Text</value>
196
                </property>
197
                <property>
198
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
199
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
200
                </property>
201

    
202
                <!-- organization -->
203
                <property>
204
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
205
                    <value>org.apache.hadoop.io.Text</value>
206
                </property>
207
                <property>
208
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
209
                    <value>org.apache.hadoop.io.Text</value>
210
                </property>
211
                <property>
212
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
213
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
214
                </property>
215

    
216
                <!-- datasourceOrganization -->
217
                <property>
218
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
219
                    <value>org.apache.hadoop.io.Text</value>
220
                </property>
221
                <property>
222
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
223
                    <value>org.apache.hadoop.io.Text</value>
224
                </property>
225
                <property>
226
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
227
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
228
                </property>
229

    
230
                <!-- datasourceTopic -->
231
                <property>
232
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
233
                    <value>org.apache.hadoop.io.Text</value>
234
                </property>
235
                <property>
236
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
237
                    <value>org.apache.hadoop.io.Text</value>
238
                </property>
239
                <property>
240
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
241
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
242
                </property>
243
                <!-- datasourceLanguage -->
244
                <property>
245
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
246
                    <value>org.apache.hadoop.io.Text</value>
247
                </property>
248
                <property>
249
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
250
                    <value>org.apache.hadoop.io.Text</value>
251
                </property>
252
                <property>
253
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
254
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
255
                </property>
256

    
257
                <!-- projectOrganization -->
258
                <property>
259
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
260
                    <value>org.apache.hadoop.io.Text</value>
261
                </property>
262
                <property>
263
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
264
                    <value>org.apache.hadoop.io.Text</value>
265
                </property>
266
                <property>
267
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
268
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
269
                </property>
270
                <!-- resultClaim -->
271
                <property>
272
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
273
                    <value>org.apache.hadoop.io.Text</value>
274
                </property>
275
                <property>
276
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
277
                    <value>org.apache.hadoop.io.Text</value>
278
                </property>
279
                <property>
280
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
281
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
282
                </property>
283

    
284
                <!-- resultClassification -->
285
                <property>
286
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
287
                    <value>org.apache.hadoop.io.Text</value>
288
                </property>
289
                <property>
290
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
291
                    <value>org.apache.hadoop.io.Text</value>
292
                </property>
293
                <property>
294
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
295
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
296
                </property>
297

    
298
                <!-- resultConcept -->
299
                <property>
300
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
301
                    <value>org.apache.hadoop.io.Text</value>
302
                </property>
303
                <property>
304
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
305
                    <value>org.apache.hadoop.io.Text</value>
306
                </property>
307
                <property>
308
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
309
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
310
                </property>
311

    
312
                <!-- resultLanguage -->
313
                <property>
314
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
315
                    <value>org.apache.hadoop.io.Text</value>
316
                </property>
317
                <property>
318
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
319
                    <value>org.apache.hadoop.io.Text</value>
320
                </property>
321
                <property>
322
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
323
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
324
                </property>
325

    
326
                <!-- resultOrganization -->
327
                <property>
328
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
329
                    <value>org.apache.hadoop.io.Text</value>
330
                </property>
331
                <property>
332
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
333
                    <value>org.apache.hadoop.io.Text</value>
334
                </property>
335
                <property>
336
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
337
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
338
                </property>
339

    
340
                <!-- resultResult -->
341
                <property>
342
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
343
                    <value>org.apache.hadoop.io.Text</value>
344
                </property>
345
                <property>
346
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
347
                    <value>org.apache.hadoop.io.Text</value>
348
                </property>
349
                <property>
350
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
351
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
352
                </property>
353

    
354
                <!-- resultProject -->
355
                <property>
356
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
357
                    <value>org.apache.hadoop.io.Text</value>
358
                </property>
359
                <property>
360
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
361
                    <value>org.apache.hadoop.io.Text</value>
362
                </property>
363
                <property>
364
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
365
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
366
                </property>
367
                <!-- resultTopic -->
368
                <property>
369
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
370
                    <value>org.apache.hadoop.io.Text</value>
371
                </property>
372
                <property>
373
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
374
                    <value>org.apache.hadoop.io.Text</value>
375
                </property>
376
                <property>
377
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
378
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
379
                </property>
380
                <!-- resultDatasource -->
381
                <property>
382
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
383
                    <value>org.apache.hadoop.io.Text</value>
384
                </property>
385
                <property>
386
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
387
                    <value>org.apache.hadoop.io.Text</value>
388
                </property>
389
                <property>
390
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
391
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
392
                </property>
393

    
394
                <!-- result -->
395
                <property>
396
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
397
                    <value>org.apache.hadoop.io.Text</value>
398
                </property>
399
                <property>
400
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
401
                    <value>org.apache.hadoop.io.Text</value>
402
                </property>
403
                <property>
404
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
405
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
406
                </property>
407

    
408

    
409
                <!-- context -->
410
                <property>
411
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
412
                    <value>org.apache.hadoop.io.Text</value>
413
                </property>
414
                <property>
415
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
416
                    <value>org.apache.hadoop.io.Text</value>
417
                </property>
418
                <property>
419
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
420
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
421
                </property>
422

    
423
                <!-- concept -->
424
                <property>
425
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
426
                    <value>org.apache.hadoop.io.Text</value>
427
                </property>
428
                <property>
429
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
430
                    <value>org.apache.hadoop.io.Text</value>
431
                </property>
432
                <property>
433
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
434
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
435
                </property>
436

    
437
                <!-- category -->
438

    
439
                <property>
440
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
441
                    <value>org.apache.hadoop.io.Text</value>
442
                </property>
443
                <property>
444
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
445
                    <value>org.apache.hadoop.io.Text</value>
446
                </property>
447
                <property>
448
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
449
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
450
                </property>
451

    
452

    
453
                <!-- person -->
454

    
455
                <property>
456
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
457
                    <value>org.apache.hadoop.io.Text</value>
458
                </property>
459
                <property>
460
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.value</name>
461
                    <value>org.apache.hadoop.io.Text</value>
462
                </property>
463
                <property>
464
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
465
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
466
                </property>
467

    
468

    
469
                <!-- personResult -->
470
                <property>
471
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
472
                    <value>org.apache.hadoop.io.Text</value>
473
                </property>
474
                <property>
475
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.value</name>
476
                    <value>org.apache.hadoop.io.Text</value>
477
                </property>
478
                <property>
479
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
480
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
481
                </property>
482

    
483
                <!--resultCitation -->
484
                <property>
485
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
486
                    <value>org.apache.hadoop.io.Text</value>
487
                </property>
488
                <property>
489
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.value</name>
490
                    <value>org.apache.hadoop.io.Text</value>
491
                </property>
492
                <property>
493
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
494
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
495
                </property>
496

    
497
                <!--resultPid -->
498
                <property>
499
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
500
                    <value>org.apache.hadoop.io.Text</value>
501
                </property>
502
                <property>
503
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.value</name>
504
                    <value>org.apache.hadoop.io.Text</value>
505
                </property>
506
                <property>
507
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
508
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
509
                </property>
510

    
511
                <!-- resultOid-->
512

    
513
                <property>
514
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
515
                    <value>org.apache.hadoop.io.Text</value>
516
                </property>
517
                <property>
518
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.value</name>
519
                    <value>org.apache.hadoop.io.Text</value>
520
                </property>
521
                <property>
522
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
523
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
524
                </property>
525

    
526
                <!--projectOid-->
527
                <property>
528
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
529
                    <value>org.apache.hadoop.io.Text</value>
530
                </property>
531
                <property>
532
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.value</name>
533
                    <value>org.apache.hadoop.io.Text</value>
534
                </property>
535
                <property>
536
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
537
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
538
                </property>
539

    
540
                <!-- projectKeyword-->
541
                <property>
542
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
543
                    <value>org.apache.hadoop.io.Text</value>
544
                </property>
545
                <property>
546
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.value</name>
547
                    <value>org.apache.hadoop.io.Text</value>
548
                </property>
549
                <property>
550
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
551
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
552
                </property>
553

    
554
                <!-- projectSubject-->
555

    
556
                <property>
557
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
558
                    <value>org.apache.hadoop.io.Text</value>
559
                </property>
560
                <property>
561
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.value</name>
562
                    <value>org.apache.hadoop.io.Text</value>
563
                </property>
564
                <property>
565
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
566
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
567
                </property>
568

    
569
                <!--  organizationOid-->
570
                <property>
571
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
572
                    <value>org.apache.hadoop.io.Text</value>
573
                </property>
574
                <property>
575
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.value</name>
576
                    <value>org.apache.hadoop.io.Text</value>
577
                </property>
578
                <property>
579
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
580
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
581
                </property>
582

    
583

    
584
                <!--     datasourceOid-->
585

    
586
                <property>
587
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
588
                    <value>org.apache.hadoop.io.Text</value>
589
                </property>
590
                <property>
591
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.value</name>
592
                    <value>org.apache.hadoop.io.Text</value>
593
                </property>
594
                <property>
595
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
596
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
597
                </property>
598

    
599
                <!--personOid-->
600

    
601
                <property>
602
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
603
                    <value>org.apache.hadoop.io.Text</value>
604
                </property>
605
                <property>
606
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.value</name>
607
                    <value>org.apache.hadoop.io.Text</value>
608
                </property>
609
                <property>
610
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
611
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
612
                </property>
613

    
614
                <!--     projectPerson-->
615

    
616
                <property>
617
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
618
                    <value>org.apache.hadoop.io.Text</value>
619
                </property>
620
                <property>
621
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.value</name>
622
                    <value>org.apache.hadoop.io.Text</value>
623
                </property>
624
                <property>
625
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
626
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
627
                </property>
628

    
629
                <!--datasourceStats-->
630
                <property>
631
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
632
                    <value>org.apache.hadoop.io.Text</value>
633
                </property>
634
                <property>
635
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.value</name>
636
                    <value>org.apache.hadoop.io.Text</value>
637
                </property>
638
                <property>
639
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.format</name>
640
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
641
                </property>
642

    
643

    
644
                <!--projectStats-->
645
                <property>
646
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
647
                    <value>org.apache.hadoop.io.Text</value>
648
                </property>
649
                <property>
650
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
651
                    <value>org.apache.hadoop.io.Text</value>
652
                </property>
653
                <property>
654
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
655
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
656
                </property>
657

    
658

    
659
                <!--   organizationStats-->
660
                <property>
661
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
662
                    <value>org.apache.hadoop.io.Text</value>
663
                </property>
664
                <property>
665
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
666
                    <value>org.apache.hadoop.io.Text</value>
667
                </property>
668
                <property>
669
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
670
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
671
                </property>
672

    
673

    
674
                <!--resultStats-->
675

    
676
                <property>
677
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
678
                    <value>org.apache.hadoop.io.Text</value>
679
                </property>
680
                <property>
681
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
682
                    <value>org.apache.hadoop.io.Text</value>
683
                </property>
684
                <property>
685
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
686
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
687
                </property>
688

    
689
                <!--result-->
690

    
691
                <property>
692
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
693
                    <value>org.apache.hadoop.io.Text</value>
694
                </property>
695
                <property>
696
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
697
                    <value>org.apache.hadoop.io.Text</value>
698
                </property>
699
                <property>
700
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
701
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
702
                </property>
703

    
704
                <!--resultDescription-->
705

    
706
                <property>
707
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.key</name>
708
                    <value>org.apache.hadoop.io.Text</value>
709
                </property>
710
                <property>
711
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.value</name>
712
                    <value>org.apache.hadoop.io.Text</value>
713
                </property>
714
                <property>
715
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.format</name>
716
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
717
                </property>
718

    
719

    
720
                <!-- ## Classes of mapper and reducer -->
721

    
722
                <property>
723
                    <name>mapreduce.map.class</name>
724
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
725
                </property>
726
                <property>
727
                    <name>mapreduce.reduce.class</name>
728
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
729
                </property>
730
                <property>
731
                    <name>io.serializations</name>
732
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
733
                </property>
734

    
735
                <!-- ## Custom config -->
736

    
737
                <!--delim character used to seperate fields in hdfs dump files <property> -->
738
                <property>
739
                    <name>stats.delim</name>
740
                    <value>${Stats_delim_Character}</value>
741
                </property>
742
                <!--default string for Null String Values -->
743
                <property>
744
                    <name>stats.nullString</name>
745
                    <value>${Stats_null_String_Field}</value>
746
                </property>
747
                <!--default string for Null Numeric Values -->
748
                <property>
749
                    <name>stats.nullNum</name>
750
                    <value>${Stats_null_Numeric_Field}</value>
751
                </property>
752
                <property>
753
                    <name>stats.enclChar</name>
754
                    <value>${Stats_enclosing_Character}</value>
755
                </property>
756

    
757

    
758
                <!--source hbase table -->
759
                <property>
760
                    <name>hbase.mapreduce.inputtable</name>
761
                    <value>${Stats_Hbase_Source_Table}</value>
762
                </property>
763
                <property>
764
                    <name>hbase.mapred.inputtable</name>
765
                    <value>${Stats_Hbase_Source_Table}</value>
766
                </property>
767

    
768
                <property>
769
                    <!-- mapping of protos entities to tables in the relDB -->
770
                    <name>stats.dbTablesMap</name>
771
                    <value>${Stats_db_table_map}</value>
772
                </property>
773

    
774
                <!-- This directory does not correspond to a data store. In fact, this directory only
775
                    contains multiple data stores. It has to be set to the name of the workflow node.
776
                    -->
777
                <property>
778
                    <name>mapred.output.dir</name>
779
                    <value>${Stats_output_Path}</value>
780
                </property>
781
                <property>
782
                    <name>stats.indexConf</name>
783
                    <value>${Stats_indexConf}</value>
784
                </property>
785
                <!-- ## Workflow node parameters -->
786
                <property>
787
                    <name>mapred.reduce.tasks</name>
788
                    <value>${numReducers}</value>
789
                </property>
790

    
791
            </configuration>
792
        </map-reduce>
793
        <ok to="exportContext"/>
794

    
795
        <error to="fail"/>
796
    </action>
797

    
798
    <action name='exportContext'>
799
        <java>
800
            <prepare>
801
            </prepare>
802
            <configuration>
803
                <property>
804
                    <name>mapred.job.queue.name</name>
805
                    <value>${queueName}</value>
806
                </property>
807
            </configuration>
808
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
809

    
810
            <arg>${Stats_output_Path}</arg>
811
            <arg>${isLookupEndpoint}</arg>
812
        </java>
813
        <ok to="prepareDatabase"/>
814

    
815
        <error to="fail"/>
816
    </action>
817

    
818

    
819
    <action name="prepareDatabase">
820
        <java>
821
            <prepare>
822
            </prepare>
823
            <configuration>
824
                <property>
825
                    <name>mapred.job.queue.name</name>
826
                    <value>${queueName}</value>
827
                </property>
828
            </configuration>
829
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
830
            <arg>-SworkingDir=${workingDir}</arg>
831
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
832
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
833
            <arg>-PStats_db_User=${Stats_db_User}</arg>
834
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
835
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
836
        </java>
837

    
838
        <ok to="sqoopImport"/>
839
        <error to="fail"/>
840
    </action>
841

    
842
    <action name="sqoopImport">
843
        <java>
844
            <prepare>
845
            </prepare>
846
            <configuration>
847
                <property>
848
                    <name>mapred.job.queue.name</name>
849
                    <value>${queueName}</value>
850
                </property>
851

    
852
                <property>
853
                    <name>oozie.sqoop.log.level</name>
854
                    <value>DEBUG</value>
855
                </property>
856

    
857
            </configuration>
858

    
859
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
860
            <arg>-SworkingDir=${workingDir}</arg>
861
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
862
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
863
            <arg>-PStats_db_User=${Stats_db_User}</arg>
864
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
865
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
866
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
867
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
868
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
869
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
870
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
871
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
872

    
873
        </java>
874
        <ok to="exportUsageStats"/>
875

    
876

    
877
        <error to="fail"/>
878
    </action>
879

    
880

    
881
    <action name='exportUsageStats'>
882
        <java>
883
            <prepare>
884
            </prepare>
885
            <configuration>
886
                <property>
887
                    <name>mapred.job.queue.name</name>
888
                    <value>${queueName}</value>
889
                </property>
890

    
891
                <property>
892
                    <name>oozie.sqoop.log.level</name>
893
                    <value>DEBUG</value>
894
                </property>
895
            </configuration>
896

    
897
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
898
            <arg>-SworkingDir=${workingDir}</arg>
899
            <arg>eu.dnetlib.iis.core.workflows.stats.UsageStatsWrapper</arg>
900
            <arg>-Ppiwik_username=${piwik_username}</arg>
901
            <arg>-Ppiwik_pass=${piwik_pass}</arg>
902
            <arg>-Ppiwik_httpProtocol=${piwik_httpProtocol}</arg>
903
            <arg>-Ppiwik_url=${piwik_url}</arg>
904
            <arg>-Ppiwik_siteId=${piwik_siteId}</arg>
905
            <arg>-Ppiwik_startDate=${piwik_startDate}</arg>
906
            <arg>-Ppiwik_finalDate=${piwik_finalDate}</arg>
907
            <arg>-Ppiwik_logsPath=${piwik_logsPath}</arg>
908
            <arg>-Ppiwik_filterOffset=${piwik_filterOffset}</arg>
909
            <arg>-Ppiwiki_schema=${piwiki_schema}</arg>
910
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
911
            <arg>-PStats_db_User=${Stats_db_User}</arg>
912
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
913
        </java>
914

    
915
        <ok to="finalizeDatabase"/>
916
        <error to="fail"/>
917
    </action>
918

    
919

    
920

    
921
    <action name="finalizeDatabase">
922
        <java>
923
            <prepare>
924
            </prepare>
925
            <configuration>
926
                <property>
927
                    <name>mapred.job.queue.name</name>
928
                    <value>${queueName}</value>
929
                </property>
930
            </configuration>
931

    
932
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
933
            <arg>-SworkingDir=${workingDir}</arg>
934
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
935
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
936
            <arg>-PStats_db_User=${Stats_db_User}</arg>
937
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
938
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
939
        </java>
940
        <ok to="cleanUpHDFS"/>
941
        <error to="fail"/>
942
    </action>
943

    
944

    
945

    
946
    <action name="cleanUpHDFS">
947
        <fs>
948

    
949
            <delete path="${nameNode}${Stats_output_Path}"/>
950

    
951
        </fs>
952
        <ok to="end"/>
953
        <error to="fail"/>
954
    </action>
955
    <kill name="fail">
956
        <message>
957
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
958
        </message>
959
    </kill>
960
    <end name="end"/>
961
</workflow-app>
    (1-1/1)