Project

General

Profile

1 42742 eri.katsar
<workflow-app name="stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5 39902 eri.katsar
6 42742 eri.katsar
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>NORMAL</value>
21
            </property>
22
            <property>
23
                <name>mapred.job.priority</name>
24
                <value>NORMAL</value>
25
            </property>
26
        </configuration>
27
    </global>
28 39902 eri.katsar
29 55645 antonis.le
    <start to="prepareDatabase"/>
30
    <!--<start to="prepareDatabase"/>-->
31 42742 eri.katsar
32 55645 antonis.le
    <action name="prepareDatabase">
33
        <java>
34
            <prepare>
35
            </prepare>
36
            <configuration>
37
                <property>
38
                    <name>mapred.job.queue.name</name>
39
                    <value>${queueName}</value>
40
                </property>
41
            </configuration>
42
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
43
            <arg>-SworkingDir=${workingDir}</arg>
44
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
45
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
46
            <arg>-PStats_db_User=${Stats_db_User}</arg>
47
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
48
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
49
        </java>
50
51
        <ok to="mr_export"/>
52
        <error to="fail"/>
53
    </action>
54
55 42742 eri.katsar
    <action name="mr_export">
56
        <map-reduce>
57
            <prepare>
58
                <delete path="${nameNode}${Stats_output_Path}"/>
59
            </prepare>
60
            <configuration>
61 39902 eri.katsar
                <property>
62 42742 eri.katsar
                    <name>hbase.mapreduce.scan</name>
63
                    <value>${wf:actionData('get-scanner')['scan']}</value>
64
                </property>
65
                <property>
66
                    <name>hbase.rootdir</name>
67
                    <value>$nameNode/hbase</value>
68
                </property>
69
                <property>
70
                    <name>hbase.security.authentication</name>
71
                    <value>simple</value>
72
                </property>
73 55645 antonis.le
74 42742 eri.katsar
                <!-- ZOOKEEPER -->
75
                <property>
76
                    <name>hbase.zookeeper.quorum</name>
77
                    <value>
78
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
79
                    </value>
80
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
81
                        </value> -->
82
                </property>
83
                <property>
84
                    <name>zookeeper.znode.rootserver</name>
85
                    <value>root-region-server</value>
86
87
                </property>
88
                <property>
89
                    <name>hbase.zookeeper.property.clientPort</name>
90
                    <value>2181</value>
91
                </property>
92
93
                <!-- MR IO -->
94
                <property>
95
                    <name>mapreduce.inputformat.class</name>
96
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
97
                </property>
98
                <property>
99
                    <name>mapred.mapoutput.key.class</name>
100
                    <value>org.apache.hadoop.io.Text</value>
101
                </property>
102
                <property>
103
                    <name>mapred.mapoutput.value.class</name>
104
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
105
                </property>
106
                <property>
107
                    <name>mapred.output.key.class</name>
108
                    <value>org.apache.hadoop.io.Text</value>
109
                </property>
110
                <property>
111
                    <name>mapred.output.value.class</name>
112
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
113
                </property>
114
                <!-- ## This is required for new MapReduce API usage -->
115
                <property>
116
                    <name>mapred.mapper.new-api</name>
117
                    <value>true</value>
118
                </property>
119
                <property>
120
                    <name>mapred.reducer.new-api</name>
121
                    <value>true</value>
122
                </property>
123
                <!-- # Job-specific options -->
124
                <property>
125
                    <name>dfs.blocksize</name>
126
                    <value>32M</value>
127
                </property>
128
                <property>
129
                    <name>mapred.output.compress</name>
130
                    <value>false</value>
131
                </property>
132
                <property>
133
                    <name>mapred.reduce.tasks.speculative.execution</name>
134
                    <value>false</value>
135
                </property>
136
                <property>
137
                    <name>mapred.reduce.tasks.speculative.execution</name>
138
                    <value>false</value>
139
                </property>
140
                <property>
141
                    <name>mapreduce.map.speculative</name>
142
                    <value>false</value>
143
                </property>
144
145
                <!-- I/O FORMAT -->
146
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
147
                    issue with traling tab added between id and value in multiple outputs -->
148
                <property>
149
                    <name>mapred.textoutputformat.separator</name>
150
                    <value>${Stats_delim_Character}</value>
151
                </property>
152
                <!-- ## Names of all output ports -->
153
154
                <property>
155
                    <name>mapreduce.multipleoutputs</name>
156
157
                    <value>
158
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33} ${out34} ${out35} ${out36} ${out37} ${out38} ${out39}
159
                    </value>
160
161
                </property>
162
                <!-- datasource -->
163
                <property>
164
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
165
                    <value>org.apache.hadoop.io.Text</value>
166
                </property>
167
                <property>
168
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
169
                    <value>org.apache.hadoop.io.Text</value>
170
                </property>
171
                <property>
172
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
173
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
174
                </property>
175
                <!-- project -->
176
                <property>
177
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
178
                    <value>org.apache.hadoop.io.Text</value>
179
                </property>
180
                <property>
181
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
182
                    <value>org.apache.hadoop.io.Text</value>
183
                </property>
184
                <property>
185
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
186
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
187
                </property>
188
189
                <!-- organization -->
190
                <property>
191
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
192
                    <value>org.apache.hadoop.io.Text</value>
193
                </property>
194
                <property>
195
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
196
                    <value>org.apache.hadoop.io.Text</value>
197
                </property>
198
                <property>
199
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
200
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
201
                </property>
202
203
                <!-- datasourceOrganization -->
204
                <property>
205
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
206
                    <value>org.apache.hadoop.io.Text</value>
207
                </property>
208
                <property>
209
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
210
                    <value>org.apache.hadoop.io.Text</value>
211
                </property>
212
                <property>
213
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
214
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
215
                </property>
216
217
                <!-- datasourceTopic -->
218
                <property>
219
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
220
                    <value>org.apache.hadoop.io.Text</value>
221
                </property>
222
                <property>
223
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
224
                    <value>org.apache.hadoop.io.Text</value>
225
                </property>
226
                <property>
227
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
228
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
229
                </property>
230
                <!-- datasourceLanguage -->
231
                <property>
232
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
233
                    <value>org.apache.hadoop.io.Text</value>
234
                </property>
235
                <property>
236
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
237
                    <value>org.apache.hadoop.io.Text</value>
238
                </property>
239
                <property>
240
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
241
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
242
                </property>
243
244
                <!-- projectOrganization -->
245
                <property>
246
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
247
                    <value>org.apache.hadoop.io.Text</value>
248
                </property>
249
                <property>
250
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
251
                    <value>org.apache.hadoop.io.Text</value>
252
                </property>
253
                <property>
254
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
255
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
256
                </property>
257
                <!-- resultClaim -->
258
                <property>
259
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
260
                    <value>org.apache.hadoop.io.Text</value>
261
                </property>
262
                <property>
263
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
264
                    <value>org.apache.hadoop.io.Text</value>
265
                </property>
266
                <property>
267
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
268
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
269
                </property>
270
271
                <!-- resultClassification -->
272
                <property>
273
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
274
                    <value>org.apache.hadoop.io.Text</value>
275
                </property>
276
                <property>
277
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
278
                    <value>org.apache.hadoop.io.Text</value>
279
                </property>
280
                <property>
281
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
282
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
283
                </property>
284
285
                <!-- resultConcept -->
286
                <property>
287
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
288
                    <value>org.apache.hadoop.io.Text</value>
289
                </property>
290
                <property>
291
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
292
                    <value>org.apache.hadoop.io.Text</value>
293
                </property>
294
                <property>
295
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
296
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
297
                </property>
298
299
                <!-- resultLanguage -->
300
                <property>
301
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
302
                    <value>org.apache.hadoop.io.Text</value>
303
                </property>
304
                <property>
305
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
306
                    <value>org.apache.hadoop.io.Text</value>
307
                </property>
308
                <property>
309
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
310
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
311
                </property>
312
313
                <!-- resultOrganization -->
314
                <property>
315
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
316
                    <value>org.apache.hadoop.io.Text</value>
317
                </property>
318
                <property>
319
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
320
                    <value>org.apache.hadoop.io.Text</value>
321
                </property>
322
                <property>
323
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
324
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
325
                </property>
326
327
                <!-- resultResult -->
328
                <property>
329
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
330
                    <value>org.apache.hadoop.io.Text</value>
331
                </property>
332
                <property>
333
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
334
                    <value>org.apache.hadoop.io.Text</value>
335
                </property>
336
                <property>
337
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
338
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
339
                </property>
340
341
                <!-- resultProject -->
342
                <property>
343
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
344
                    <value>org.apache.hadoop.io.Text</value>
345
                </property>
346
                <property>
347
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
348
                    <value>org.apache.hadoop.io.Text</value>
349
                </property>
350
                <property>
351
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
352
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
353
                </property>
354
                <!-- resultTopic -->
355
                <property>
356
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
357
                    <value>org.apache.hadoop.io.Text</value>
358
                </property>
359
                <property>
360
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
361
                    <value>org.apache.hadoop.io.Text</value>
362
                </property>
363
                <property>
364
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
365
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
366
                </property>
367
                <!-- resultDatasource -->
368
                <property>
369
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
370
                    <value>org.apache.hadoop.io.Text</value>
371
                </property>
372
                <property>
373
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
374
                    <value>org.apache.hadoop.io.Text</value>
375
                </property>
376
                <property>
377
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
378
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
379
                </property>
380
381
                <!-- result -->
382
                <property>
383
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
384
                    <value>org.apache.hadoop.io.Text</value>
385
                </property>
386
                <property>
387
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
388
                    <value>org.apache.hadoop.io.Text</value>
389
                </property>
390
                <property>
391
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
392
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
393
                </property>
394
395
396
                <!-- context -->
397
                <property>
398
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
399
                    <value>org.apache.hadoop.io.Text</value>
400
                </property>
401
                <property>
402
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
403
                    <value>org.apache.hadoop.io.Text</value>
404
                </property>
405
                <property>
406
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
407
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
408
                </property>
409
410
                <!-- concept -->
411
                <property>
412
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
413
                    <value>org.apache.hadoop.io.Text</value>
414
                </property>
415
                <property>
416
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
417
                    <value>org.apache.hadoop.io.Text</value>
418
                </property>
419
                <property>
420
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
421
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
422
                </property>
423
424
                <!-- category -->
425
426
                <property>
427
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
428
                    <value>org.apache.hadoop.io.Text</value>
429
                </property>
430
                <property>
431
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
432
                    <value>org.apache.hadoop.io.Text</value>
433
                </property>
434
                <property>
435
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
436
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
437
                </property>
438
439
440
                <!-- person -->
441
442
                <property>
443 39902 eri.katsar
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
444
                    <value>org.apache.hadoop.io.Text</value>
445
                </property>
446
                <property>
447
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.value</name>
448
                    <value>org.apache.hadoop.io.Text</value>
449
                </property>
450
                <property>
451
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
452
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
453
                </property>
454
455 42742 eri.katsar
456
                <!-- personResult -->
457
                <property>
458 39902 eri.katsar
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
459
                    <value>org.apache.hadoop.io.Text</value>
460
                </property>
461
                <property>
462
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.value</name>
463
                    <value>org.apache.hadoop.io.Text</value>
464
                </property>
465
                <property>
466
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
467
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
468
                </property>
469
470 42742 eri.katsar
                <!--resultCitation -->
471
                <property>
472
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
473
                    <value>org.apache.hadoop.io.Text</value>
474
                </property>
475
                <property>
476
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.value</name>
477
                    <value>org.apache.hadoop.io.Text</value>
478
                </property>
479
                <property>
480
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
481
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
482
                </property>
483
484
                <!--resultPid -->
485
                <property>
486
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
487
                    <value>org.apache.hadoop.io.Text</value>
488
                </property>
489
                <property>
490
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.value</name>
491
                    <value>org.apache.hadoop.io.Text</value>
492
                </property>
493
                <property>
494
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
495
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
496
                </property>
497
498
                <!-- resultOid-->
499
500
                <property>
501
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
502
                    <value>org.apache.hadoop.io.Text</value>
503
                </property>
504
                <property>
505
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.value</name>
506
                    <value>org.apache.hadoop.io.Text</value>
507
                </property>
508
                <property>
509
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
510
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
511
                </property>
512
513
                <!--projectOid-->
514
                <property>
515
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
516
                    <value>org.apache.hadoop.io.Text</value>
517
                </property>
518
                <property>
519
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.value</name>
520
                    <value>org.apache.hadoop.io.Text</value>
521
                </property>
522
                <property>
523
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
524
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
525
                </property>
526
527
                <!-- projectKeyword-->
528
                <property>
529
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
530
                    <value>org.apache.hadoop.io.Text</value>
531
                </property>
532
                <property>
533
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.value</name>
534
                    <value>org.apache.hadoop.io.Text</value>
535
                </property>
536
                <property>
537
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
538
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
539
                </property>
540
541
                <!-- projectSubject-->
542
543
                <property>
544
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
545
                    <value>org.apache.hadoop.io.Text</value>
546
                </property>
547
                <property>
548
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.value</name>
549
                    <value>org.apache.hadoop.io.Text</value>
550
                </property>
551
                <property>
552
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
553
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
554
                </property>
555
556
                <!--  organizationOid-->
557
                <property>
558
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
559
                    <value>org.apache.hadoop.io.Text</value>
560
                </property>
561
                <property>
562
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.value</name>
563
                    <value>org.apache.hadoop.io.Text</value>
564
                </property>
565
                <property>
566
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
567
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
568
                </property>
569
570
571
                <!--     datasourceOid-->
572
573
                <property>
574
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
575
                    <value>org.apache.hadoop.io.Text</value>
576
                </property>
577
                <property>
578
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.value</name>
579
                    <value>org.apache.hadoop.io.Text</value>
580
                </property>
581
                <property>
582
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
583
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
584
                </property>
585
586
                <!--personOid-->
587
588
                <property>
589
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
590
                    <value>org.apache.hadoop.io.Text</value>
591
                </property>
592
                <property>
593
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.value</name>
594
                    <value>org.apache.hadoop.io.Text</value>
595
                </property>
596
                <property>
597
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
598
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
599
                </property>
600
601
                <!--     projectPerson-->
602
603
                <property>
604
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
605
                    <value>org.apache.hadoop.io.Text</value>
606
                </property>
607
                <property>
608
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.value</name>
609
                    <value>org.apache.hadoop.io.Text</value>
610
                </property>
611
                <property>
612
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
613
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
614
                </property>
615
616
                <!--datasourceStats-->
617
                <property>
618
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
619
                    <value>org.apache.hadoop.io.Text</value>
620
                </property>
621
                <property>
622
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.value</name>
623
                    <value>org.apache.hadoop.io.Text</value>
624
                </property>
625
                <property>
626
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.format</name>
627
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
628
                </property>
629
630
631
                <!--projectStats-->
632
                <property>
633
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
634
                    <value>org.apache.hadoop.io.Text</value>
635
                </property>
636
                <property>
637
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
638
                    <value>org.apache.hadoop.io.Text</value>
639
                </property>
640
                <property>
641
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
642
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
643
                </property>
644
645
646
                <!--   organizationStats-->
647
                <property>
648
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
649
                    <value>org.apache.hadoop.io.Text</value>
650
                </property>
651
                <property>
652
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
653
                    <value>org.apache.hadoop.io.Text</value>
654
                </property>
655
                <property>
656
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
657
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
658
                </property>
659
660
661
                <!--resultStats-->
662
663
                <property>
664
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
665
                    <value>org.apache.hadoop.io.Text</value>
666
                </property>
667
                <property>
668
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
669
                    <value>org.apache.hadoop.io.Text</value>
670
                </property>
671
                <property>
672
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
673
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
674
                </property>
675
676
                <!--result-->
677
678
                <property>
679
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
680
                    <value>org.apache.hadoop.io.Text</value>
681
                </property>
682
                <property>
683
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
684
                    <value>org.apache.hadoop.io.Text</value>
685
                </property>
686
                <property>
687
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
688
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
689
                </property>
690
691
                <!--resultDescription-->
692
693
                <property>
694
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.key</name>
695
                    <value>org.apache.hadoop.io.Text</value>
696
                </property>
697
                <property>
698
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.value</name>
699
                    <value>org.apache.hadoop.io.Text</value>
700
                </property>
701
                <property>
702
                    <name>mapreduce.multipleoutputs.namedOutput.${out38}.format</name>
703
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
704
                </property>
705
706
707
                <!--orgOrg-->
708
709
                <property>
710
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.key</name>
711
                    <value>org.apache.hadoop.io.Text</value>
712
                </property>
713
                <property>
714
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.value</name>
715
                    <value>org.apache.hadoop.io.Text</value>
716
                </property>
717
                <property>
718
                    <name>mapreduce.multipleoutputs.namedOutput.${out39}.format</name>
719
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
720
                </property>
721
722
723 39902 eri.katsar
                <!-- ## Classes of mapper and reducer -->
724
725 42742 eri.katsar
                <property>
726
                    <name>mapreduce.map.class</name>
727
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
728
                </property>
729
                <property>
730
                    <name>mapreduce.reduce.class</name>
731
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
732
                </property>
733
                <property>
734
                    <name>io.serializations</name>
735
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
736
                </property>
737 39902 eri.katsar
738 42742 eri.katsar
                <!-- ## Custom config -->
739 39902 eri.katsar
740 42742 eri.katsar
                <!--delim character used to seperate fields in hdfs dump files <property> -->
741
                <property>
742
                    <name>stats.delim</name>
743
                    <value>${Stats_delim_Character}</value>
744
                </property>
745
                <!--default string for Null String Values -->
746
                <property>
747
                    <name>stats.nullString</name>
748
                    <value>${Stats_null_String_Field}</value>
749
                </property>
750
                <!--default string for Null Numeric Values -->
751
                <property>
752
                    <name>stats.nullNum</name>
753
                    <value>${Stats_null_Numeric_Field}</value>
754
                </property>
755 39902 eri.katsar
756 42742 eri.katsar
                <property>
757
                    <name>stats.enclChar</name>
758
                    <value>${Stats_enclosing_Character}</value>
759
                </property>
760 39902 eri.katsar
761 42742 eri.katsar
                <property>
762
                    <name>stats.getDeletedByInf</name>
763
                    <value>${Stats_getdeletedbyinference}</value>
764
                </property>
765 39902 eri.katsar
766 42742 eri.katsar
                <!--
767
                                <property>
768
                                    <name>stats.mergedPass</name>
769
                                    <value>${Stats_db_Merged_Pass}</value>
770
                                </property>
771
772
773
                                <property>
774
                                    <name>stats.mergeduser</name>
775
                                    <value>${Stats_db_Merged_User}</value>
776
                                </property>-->
777
778
779
                <!--source hbase table -->
780
                <property>
781
                    <name>hbase.mapreduce.inputtable</name>
782
                    <value>${Stats_Hbase_Source_Table}</value>
783
                </property>
784
                <property>
785
                    <name>hbase.mapred.inputtable</name>
786
                    <value>${Stats_Hbase_Source_Table}</value>
787
                </property>
788
789
                <property>
790
                    <!-- mapping of protos entities to tables in the relDB -->
791
                    <name>stats.dbTablesMap</name>
792
                    <value>${Stats_db_table_map}</value>
793
                </property>
794
795
                <!-- This directory does not correspond to a data store. In fact, this directory only
796
                    contains multiple data stores. It has to be set to the name of the workflow node.
797
                    -->
798
                <property>
799
                    <name>mapred.output.dir</name>
800
                    <value>${Stats_output_Path}</value>
801
                </property>
802
                <property>
803
                    <name>stats.indexConf</name>
804
                    <value>${Stats_indexConf}</value>
805
                </property>
806
                <!-- ## Workflow node parameters -->
807
                <property>
808
                    <name>mapred.reduce.tasks</name>
809
                    <value>${numReducers}</value>
810
                </property>
811
812
            </configuration>
813
        </map-reduce>
814
        <ok to="exportContext"/>
815
816
        <error to="fail"/>
817
    </action>
818
819
    <action name='exportContext'>
820 39902 eri.katsar
        <java>
821
            <prepare>
822
            </prepare>
823
            <configuration>
824
                <property>
825
                    <name>mapred.job.queue.name</name>
826
                    <value>${queueName}</value>
827
                </property>
828
            </configuration>
829 42742 eri.katsar
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
830 39902 eri.katsar
831 42742 eri.katsar
            <arg>${Stats_output_Path}</arg>
832
            <arg>${isLookupEndpoint}</arg>
833
        </java>
834 55645 antonis.le
        <ok to="sqoopImport"/>
835 42742 eri.katsar
836
        <error to="fail"/>
837
    </action>
838
839
    <action name="sqoopImport">
840
        <java>
841
            <prepare>
842
            </prepare>
843
            <configuration>
844
                <property>
845
                    <name>mapred.job.queue.name</name>
846
                    <value>${queueName}</value>
847
                </property>
848
849
                <property>
850
                    <name>oozie.sqoop.log.level</name>
851
                    <value>DEBUG</value>
852
                </property>
853
854
            </configuration>
855
856
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
857
            <arg>-SworkingDir=${workingDir}</arg>
858
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
859
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
860
            <arg>-PStats_db_User=${Stats_db_User}</arg>
861
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
862
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
863
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
864
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
865
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
866
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
867
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
868
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
869
870
        </java>
871
        <ok to="finalizeDatabase"/>
872
873
874
        <error to="fail"/>
875
    </action>
876
877 45949 tsampikos.
    <action name="finalizeDatabase">
878
        <java>
879
            <prepare>
880
            </prepare>
881
            <configuration>
882
                <property>
883
                    <name>mapred.job.queue.name</name>
884
                    <value>${queueName}</value>
885
                </property>
886
            </configuration>
887
888
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
889
            <arg>-SworkingDir=${workingDir}</arg>
890
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
891
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
892
            <arg>-PStats_db_User=${Stats_db_User}</arg>
893
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
894
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
895
        </java>
896
        <ok to="exportUsageStats"/>
897
        <error to="fail"/>
898
    </action>
899
900 42742 eri.katsar
    <action name='exportUsageStats'>
901
        <java>
902
            <prepare>
903 45949 tsampikos.
                <delete path="${nameNode}${piwik_logsPath}"/>
904
                <mkdir path="${nameNode}${piwik_logsPath}"/>
905 42742 eri.katsar
            </prepare>
906
            <configuration>
907
                <property>
908
                    <name>mapred.job.queue.name</name>
909
                    <value>${queueName}</value>
910
                </property>
911
912
                <property>
913
                    <name>oozie.sqoop.log.level</name>
914
                    <value>DEBUG</value>
915
                </property>
916
            </configuration>
917
918
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
919
            <arg>-SworkingDir=${workingDir}</arg>
920
            <arg>eu.dnetlib.iis.core.workflows.stats.UsageStatsWrapper</arg>
921
            <arg>-Ppiwik_username=${piwik_username}</arg>
922
            <arg>-Ppiwik_pass=${piwik_pass}</arg>
923
            <arg>-Ppiwik_httpProtocol=${piwik_httpProtocol}</arg>
924
            <arg>-Ppiwik_url=${piwik_url}</arg>
925
            <arg>-Ppiwik_siteId=${piwik_siteId}</arg>
926
            <arg>-Ppiwik_startDate=${piwik_startDate}</arg>
927
            <arg>-Ppiwik_finalDate=${piwik_finalDate}</arg>
928
            <arg>-Ppiwik_logsPath=${piwik_logsPath}</arg>
929
            <arg>-Ppiwik_filterOffset=${piwik_filterOffset}</arg>
930
            <arg>-Ppiwiki_schema=${piwiki_schema}</arg>
931
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
932
            <arg>-PStats_db_User=${Stats_db_User}</arg>
933
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
934 46169 tsampikos.
            <arg>-Ptoken_auth=${token_auth}</arg>
935 42742 eri.katsar
936 46169 tsampikos.
<!--
937 43588 tsampikos.
            <arg>-Pirus_baseuRL=${irus_baseuRL}</arg>
938
            <arg>-Pirus_reportType=${irus_reportType}</arg>
939
            <arg>-Pirus_release=${irus_release}</arg>
940
            <arg>-Pirus_requestorid=${irus_requestorid}</arg>
941
            <arg>-Pirus_repositoryid=${irus_repositoryid}</arg>
942
            <arg>-Pirus_period_granularity=${irus_period_granularity}</arg>
943
            <arg>-Pirus_period=${irus_period}</arg>
944
            <arg>-Pirus_token_auth=${irus_token_auth}</arg>
945 46169 tsampikos.
-->
946 42742 eri.katsar
947 43588 tsampikos.
948 42742 eri.katsar
        </java>
949
950
        <ok to="cleanUpHDFS"/>
951
        <error to="fail"/>
952
    </action>
953
954
955
    <action name="cleanUpHDFS">
956
        <fs>
957
958
            <delete path="${nameNode}${Stats_output_Path}"/>
959 45949 tsampikos.
            <delete path="${nameNode}${piwik_logsPath}"/>
960
961 42742 eri.katsar
        </fs>
962
        <ok to="end"/>
963
        <error to="fail"/>
964
    </action>
965
    <kill name="fail">
966
        <message>
967
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
968
        </message>
969
    </kill>
970
    <end name="end"/>
971 39902 eri.katsar
</workflow-app>