Project

General

Profile

« Previous | Next » 

Revision 41501

Added by Eri Katsari about 8 years ago

updates for usage statistics

View differences:

workflow.xml
1 1
<workflow-app name="stats-export"
2
	xmlns="uri:oozie:workflow:0.4">
3
	<!-- map reduce job that exports hbase data and prepares them for import to the relation
4
		database used for statistics generation -->
5
	
6
	<global>
7
		<job-tracker>${jobTracker}</job-tracker>
8
		<name-node>${nameNode}</name-node>
9
		<configuration>
10
			<property>
11
				<name>mapred.job.queue.name</name>
12
				<value>${queueName}</value>
13
			</property>
14
			<property>
15
				<name>oozie.sqoop.log.level</name>
16
				<value>DEBUG</value>
17
			</property>
18
	   <property>
2
              xmlns="uri:oozie:workflow:0.4">
3
    <!-- map reduce job that exports hbase data and prepares them for import to the relation
4
        database used for statistics generation -->
5

  
6
    <global>
7
        <job-tracker>${jobTracker}</job-tracker>
8
        <name-node>${nameNode}</name-node>
9
        <configuration>
10
            <property>
11
                <name>mapred.job.queue.name</name>
12
                <value>${queueName}</value>
13
            </property>
14
            <property>
15
                <name>oozie.sqoop.log.level</name>
16
                <value>DEBUG</value>
17
            </property>
18
            <property>
19 19
                <name>oozie.launcher.mapred.job.priority</name>
20
                <value>HIGH</value>
20
                <value>NORMAL</value>
21 21
            </property>
22 22
            <property>
23 23
                <name>mapred.job.priority</name>
24
                <value>HIGH</value>
24
                <value>NORMAL</value>
25 25
            </property>
26
	</configuration>
27
	</global>
28
       <start to="mr_export"/>
29
	<action name="mr_export">
30
		<map-reduce>
31
			
32
			<prepare>
33
				<delete path="${nameNode}${Stats_output_Path}" />
34
				
35
			</prepare>
36
			<configuration>
37
				<property>
38
					<name>hbase.mapreduce.scan</name>
39
					<value>${wf:actionData('get-scanner')['scan']}</value>
40
				</property>
41
				<property>
42
					<name>hbase.rootdir</name>
43
					<value>$nameNode/hbase</value>
44
					
45
				</property>
46
				
47
				<property>
48
					<name>hbase.security.authentication</name>
49
					<value>simple</value>
50
				</property>
51
				<!-- ZOOKEEPER -->
52
				
53
				<property>
54
					<name>hbase.zookeeper.quorum</name>
55
					<value>
56
						namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
57
					</value>
58
					<!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
59
						</value> -->
60
				</property>
61
				<property>
62
					<name>zookeeper.znode.rootserver</name>
63
					<value>root-region-server</value>
64
					
65
				</property>
66
				
67
				<property>
68
					<name>hbase.zookeeper.property.clientPort</name>
69
					<value>2181</value>
70
				</property>
71
				
72
				
73
				<!-- MR IO -->
74
				
75
				
76
				<property>
77
					<name>mapreduce.inputformat.class</name>
78
					<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
79
				</property>
80
				
81
				<property>
82
					<name>mapred.mapoutput.key.class</name>
83
					<value>org.apache.hadoop.io.Text</value>
84
				</property>
85
				<property>
86
					<name>mapred.mapoutput.value.class</name>
87
					<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
88
				</property>
89
				<property>
90
					<name>mapred.output.key.class</name>
91
					<value>org.apache.hadoop.io.Text</value>
92
				</property>
93
				<property>
94
					<name>mapred.output.value.class</name>
95
					<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
96
				</property>
97
				
98
				<!-- ## This is required for new MapReduce API usage -->
99
				<property>
100
					<name>mapred.mapper.new-api</name>
101
					<value>true</value>
102
				</property>
103
				<property>
104
					<name>mapred.reducer.new-api</name>
105
					<value>true</value>
106
				</property>
107
				
108
				<!-- # Job-specific options -->
109
				<property>
110
					<name>dfs.blocksize</name>
111
					<value>32M</value>
112
				</property>
113
				<property>
114
					<name>mapred.output.compress</name>
115
					<value>false</value>
116
				</property>
117
				<property>
118
					<name>mapred.reduce.tasks.speculative.execution</name>
119
					<value>false</value>
120
				</property>
121
				<property>
122
					<name>mapred.reduce.tasks.speculative.execution</name>
123
					<value>false</value>
124
				</property>
125
				<property>
126
					<name>mapreduce.map.speculative</name>
127
					<value>false</value>
128
				</property>
129
				
130
				<!-- I/O FORMAT -->
131
				<!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
132
					issue with traling tab added between id and value in multiple outputs -->
133
				<property>
134
					<name>mapred.textoutputformat.separator</name>
135
					<value>${Stats_delim_Character}</value>
136
				</property>
137
				<!-- ## Names of all output ports -->
138
				
139
				<property>
140
					<name>mapreduce.multipleoutputs</name>
141
					
142
					<value>
143
${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23}  ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33}
144
					</value>
145
					
146
				</property>
147
				<!-- datasource -->
148
				<property>
149
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
150
					<value>org.apache.hadoop.io.Text</value>
151
				</property>
152
				<property>
153
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
154
					<value>org.apache.hadoop.io.Text</value>
155
				</property>
156
				<property>
157
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
158
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
159
				</property>
160
				<!-- datasourceLanguage -->
161
				<property>
162
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
163
					<value>org.apache.hadoop.io.Text</value>
164
				</property>
165
				<property>
166
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
167
					<value>org.apache.hadoop.io.Text</value>
168
				</property>
169
				<property>
170
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
171
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
172
				</property>
173
				
174
				<!-- datasourceOrganization -->
175
				<property>
176
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
177
					<value>org.apache.hadoop.io.Text</value>
178
				</property>
179
				<property>
180
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
181
					<value>org.apache.hadoop.io.Text</value>
182
				</property>
183
				<property>
184
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
185
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
186
				</property>
187
				
188
				<!-- datasourceTopic -->
189
				<property>
190
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
191
					<value>org.apache.hadoop.io.Text</value>
192
				</property>
193
				<property>
194
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
195
					<value>org.apache.hadoop.io.Text</value>
196
				</property>
197
				<property>
198
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
199
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
200
				</property>
201
				
202
				<!-- resultDatasource -->
203
				<property>
204
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
205
					<value>org.apache.hadoop.io.Text</value>
206
				</property>
207
				<property>
208
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
209
					<value>org.apache.hadoop.io.Text</value>
210
				</property>
211
				<property>
212
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
213
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
214
				</property>
215
				<!-- organization -->
216
				<property>
217
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
218
					<value>org.apache.hadoop.io.Text</value>
219
				</property>
220
				<property>
221
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
222
					<value>org.apache.hadoop.io.Text</value>
223
				</property>
224
				<property>
225
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
226
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
227
				</property>
228
				
229
				<!-- projectOrganization -->
230
				<property>
231
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
232
					<value>org.apache.hadoop.io.Text</value>
233
				</property>
234
				<property>
235
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
236
					<value>org.apache.hadoop.io.Text</value>
237
				</property>
238
				<property>
239
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
240
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
241
				</property>
242
				<!-- resultProject -->
243
				<property>
244
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
245
					<value>org.apache.hadoop.io.Text</value>
246
				</property>
247
				<property>
248
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
249
					<value>org.apache.hadoop.io.Text</value>
250
				</property>
251
				<property>
252
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
253
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
254
				</property>
255
				
256
				<!-- project -->
257
				<property>
258
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
259
					<value>org.apache.hadoop.io.Text</value>
260
				</property>
261
				<property>
262
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
263
					<value>org.apache.hadoop.io.Text</value>
264
				</property>
265
				<property>
266
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
267
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
268
				</property>
269
				
270
				<!-- resultConcept -->
271
				<property>
272
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
273
					<value>org.apache.hadoop.io.Text</value>
274
				</property>
275
				<property>
276
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
277
					<value>org.apache.hadoop.io.Text</value>
278
				</property>
279
				<property>
280
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
281
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
282
				</property>
283
				
284
				<!-- resultClaim -->
285
				<property>
286
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
287
					<value>org.apache.hadoop.io.Text</value>
288
				</property>
289
				<property>
290
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
291
					<value>org.apache.hadoop.io.Text</value>
292
				</property>
293
				<property>
294
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
295
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
296
				</property>
297
				
298
				<!-- resultClassification -->
299
				<property>
300
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
301
					<value>org.apache.hadoop.io.Text</value>
302
				</property>
303
				<property>
304
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
305
					<value>org.apache.hadoop.io.Text</value>
306
				</property>
307
				<property>
308
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
309
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
310
				</property>
311
				
312
				<!-- resultLanguage -->
313
				<property>
314
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
315
					<value>org.apache.hadoop.io.Text</value>
316
				</property>
317
				<property>
318
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
319
					<value>org.apache.hadoop.io.Text</value>
320
				</property>
321
				<property>
322
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
323
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
324
				</property>
325
				
326
				<!-- resultProject -->
327
				<property>
328
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
329
					<value>org.apache.hadoop.io.Text</value>
330
				</property>
331
				<property>
332
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
333
					<value>org.apache.hadoop.io.Text</value>
334
				</property>
335
				<property>
336
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
337
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
338
				</property>
339
				<!-- resultResult -->
340
				<property>
341
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
342
					<value>org.apache.hadoop.io.Text</value>
343
				</property>
344
				<property>
345
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
346
					<value>org.apache.hadoop.io.Text</value>
347
				</property>
348
				<property>
349
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
350
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
351
				</property>
352
				<!-- resultTopic -->
353
				<property>
354
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
355
					<value>org.apache.hadoop.io.Text</value>
356
				</property>
357
				<property>
358
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
359
					<value>org.apache.hadoop.io.Text</value>
360
				</property>
361
				<property>
362
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
363
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
364
				</property>
365
				
366
				<!-- resultDatasource -->
367
				<property>
368
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
369
					<value>org.apache.hadoop.io.Text</value>
370
				</property>
371
				<property>
372
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
373
					<value>org.apache.hadoop.io.Text</value>
374
				</property>
375
				<property>
376
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
377
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
378
				</property>
379
				
380
				
381
				
382
				<!-- result -->
383
				<property>
384
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
385
					<value>org.apache.hadoop.io.Text</value>
386
				</property>
387
				<property>
388
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
389
					<value>org.apache.hadoop.io.Text</value>
390
				</property>
391
				<property>
392
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
393
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
394
				</property>
395
				
396
				<!-- claim -->
397
				<property>
398
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
399
					<value>org.apache.hadoop.io.Text</value>
400
				</property>
401
				<property>
402
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
403
					<value>org.apache.hadoop.io.Text</value>
404
				</property>
405
				<property>
406
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
407
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
408
				</property>
26
        </configuration>
27
    </global>
28
    <start to="exportUsageStats"/>
29
    <action name="mr_export">
30
        <map-reduce>
409 31

  
32
            <prepare>
33
                <delete path="${nameNode}${Stats_output_Path}"/>
34

  
35
            </prepare>
36
            <configuration>
37
                <property>
38
                    <name>hbase.mapreduce.scan</name>
39
                    <value>${wf:actionData('get-scanner')['scan']}</value>
40
                </property>
41
                <property>
42
                    <name>hbase.rootdir</name>
43
                    <value>$nameNode/hbase</value>
44

  
45
                </property>
46

  
47
                <property>
48
                    <name>hbase.security.authentication</name>
49
                    <value>simple</value>
50
                </property>
51
                <!-- ZOOKEEPER -->
52

  
53
                <property>
54
                    <name>hbase.zookeeper.quorum</name>
55
                    <value>
56
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
57
                    </value>
58
                    <!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
59
                        </value> -->
60
                </property>
61
                <property>
62
                    <name>zookeeper.znode.rootserver</name>
63
                    <value>root-region-server</value>
64

  
65
                </property>
66

  
67
                <property>
68
                    <name>hbase.zookeeper.property.clientPort</name>
69
                    <value>2181</value>
70
                </property>
71

  
72

  
73
                <!-- MR IO -->
74

  
75

  
76
                <property>
77
                    <name>mapreduce.inputformat.class</name>
78
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
79
                </property>
80

  
81
                <property>
82
                    <name>mapred.mapoutput.key.class</name>
83
                    <value>org.apache.hadoop.io.Text</value>
84
                </property>
85
                <property>
86
                    <name>mapred.mapoutput.value.class</name>
87
                    <value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
88
                </property>
89
                <property>
90
                    <name>mapred.output.key.class</name>
91
                    <value>org.apache.hadoop.io.Text</value>
92
                </property>
93
                <property>
94
                    <name>mapred.output.value.class</name>
95
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
96
                </property>
97

  
98
                <!-- ## This is required for new MapReduce API usage -->
99
                <property>
100
                    <name>mapred.mapper.new-api</name>
101
                    <value>true</value>
102
                </property>
103
                <property>
104
                    <name>mapred.reducer.new-api</name>
105
                    <value>true</value>
106
                </property>
107

  
108
                <!-- # Job-specific options -->
109
                <property>
110
                    <name>dfs.blocksize</name>
111
                    <value>32M</value>
112
                </property>
113
                <property>
114
                    <name>mapred.output.compress</name>
115
                    <value>false</value>
116
                </property>
117
                <property>
118
                    <name>mapred.reduce.tasks.speculative.execution</name>
119
                    <value>false</value>
120
                </property>
121
                <property>
122
                    <name>mapred.reduce.tasks.speculative.execution</name>
123
                    <value>false</value>
124
                </property>
125
                <property>
126
                    <name>mapreduce.map.speculative</name>
127
                    <value>false</value>
128
                </property>
129

  
130
                <!-- I/O FORMAT -->
131
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix
132
                    issue with traling tab added between id and value in multiple outputs -->
133
                <property>
134
                    <name>mapred.textoutputformat.separator</name>
135
                    <value>${Stats_delim_Character}</value>
136
                </property>
137
                <!-- ## Names of all output ports -->
138

  
139
                <property>
140
                    <name>mapreduce.multipleoutputs</name>
141

  
142
                    <value>
143
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11}
144
                        ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}
145
                        ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31}
146
                        ${out32} ${out33}
147
                    </value>
148

  
149
                </property>
150
                <!-- datasource -->
151
                <property>
152
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
153
                    <value>org.apache.hadoop.io.Text</value>
154
                </property>
155
                <property>
156
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
157
                    <value>org.apache.hadoop.io.Text</value>
158
                </property>
159
                <property>
160
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
161
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
162
                </property>
163
                <!-- datasourceLanguage -->
164
                <property>
165
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
166
                    <value>org.apache.hadoop.io.Text</value>
167
                </property>
168
                <property>
169
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
170
                    <value>org.apache.hadoop.io.Text</value>
171
                </property>
172
                <property>
173
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
174
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
175
                </property>
176

  
177
                <!-- datasourceOrganization -->
178
                <property>
179
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
180
                    <value>org.apache.hadoop.io.Text</value>
181
                </property>
182
                <property>
183
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
184
                    <value>org.apache.hadoop.io.Text</value>
185
                </property>
186
                <property>
187
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
188
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
189
                </property>
190

  
191
                <!-- datasourceTopic -->
192
                <property>
193
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
194
                    <value>org.apache.hadoop.io.Text</value>
195
                </property>
196
                <property>
197
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
198
                    <value>org.apache.hadoop.io.Text</value>
199
                </property>
200
                <property>
201
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
202
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
203
                </property>
204

  
205
                <!-- resultDatasource -->
206
                <property>
207
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
208
                    <value>org.apache.hadoop.io.Text</value>
209
                </property>
210
                <property>
211
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
212
                    <value>org.apache.hadoop.io.Text</value>
213
                </property>
214
                <property>
215
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
216
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
217
                </property>
218
                <!-- organization -->
219
                <property>
220
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
221
                    <value>org.apache.hadoop.io.Text</value>
222
                </property>
223
                <property>
224
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
225
                    <value>org.apache.hadoop.io.Text</value>
226
                </property>
227
                <property>
228
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
229
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
230
                </property>
231

  
232
                <!-- projectOrganization -->
233
                <property>
234
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
235
                    <value>org.apache.hadoop.io.Text</value>
236
                </property>
237
                <property>
238
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name>
239
                    <value>org.apache.hadoop.io.Text</value>
240
                </property>
241
                <property>
242
                    <name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
243
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
244
                </property>
245
                <!-- resultProject -->
246
                <property>
247
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
248
                    <value>org.apache.hadoop.io.Text</value>
249
                </property>
250
                <property>
251
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name>
252
                    <value>org.apache.hadoop.io.Text</value>
253
                </property>
254
                <property>
255
                    <name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
256
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
257
                </property>
258

  
259
                <!-- project -->
260
                <property>
261
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
262
                    <value>org.apache.hadoop.io.Text</value>
263
                </property>
264
                <property>
265
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name>
266
                    <value>org.apache.hadoop.io.Text</value>
267
                </property>
268
                <property>
269
                    <name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
270
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
271
                </property>
272

  
273
                <!-- resultConcept -->
274
                <property>
275
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
276
                    <value>org.apache.hadoop.io.Text</value>
277
                </property>
278
                <property>
279
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name>
280
                    <value>org.apache.hadoop.io.Text</value>
281
                </property>
282
                <property>
283
                    <name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
284
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
285
                </property>
286

  
287
                <!-- resultClaim -->
288
                <property>
289
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
290
                    <value>org.apache.hadoop.io.Text</value>
291
                </property>
292
                <property>
293
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name>
294
                    <value>org.apache.hadoop.io.Text</value>
295
                </property>
296
                <property>
297
                    <name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
298
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
299
                </property>
300

  
301
                <!-- resultClassification -->
302
                <property>
303
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
304
                    <value>org.apache.hadoop.io.Text</value>
305
                </property>
306
                <property>
307
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name>
308
                    <value>org.apache.hadoop.io.Text</value>
309
                </property>
310
                <property>
311
                    <name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
312
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
313
                </property>
314

  
315
                <!-- resultLanguage -->
316
                <property>
317
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
318
                    <value>org.apache.hadoop.io.Text</value>
319
                </property>
320
                <property>
321
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name>
322
                    <value>org.apache.hadoop.io.Text</value>
323
                </property>
324
                <property>
325
                    <name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
326
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
327
                </property>
328

  
329
                <!-- resultProject -->
330
                <property>
331
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
332
                    <value>org.apache.hadoop.io.Text</value>
333
                </property>
334
                <property>
335
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name>
336
                    <value>org.apache.hadoop.io.Text</value>
337
                </property>
338
                <property>
339
                    <name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
340
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
341
                </property>
342
                <!-- resultResult -->
343
                <property>
344
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
345
                    <value>org.apache.hadoop.io.Text</value>
346
                </property>
347
                <property>
348
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name>
349
                    <value>org.apache.hadoop.io.Text</value>
350
                </property>
351
                <property>
352
                    <name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
353
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
354
                </property>
355
                <!-- resultTopic -->
356
                <property>
357
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
358
                    <value>org.apache.hadoop.io.Text</value>
359
                </property>
360
                <property>
361
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name>
362
                    <value>org.apache.hadoop.io.Text</value>
363
                </property>
364
                <property>
365
                    <name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
366
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
367
                </property>
368

  
369
                <!-- resultDatasource -->
370
                <property>
371
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
372
                    <value>org.apache.hadoop.io.Text</value>
373
                </property>
374
                <property>
375
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name>
376
                    <value>org.apache.hadoop.io.Text</value>
377
                </property>
378
                <property>
379
                    <name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
380
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
381
                </property>
382

  
383

  
384
                <!-- result -->
385
                <property>
386
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
387
                    <value>org.apache.hadoop.io.Text</value>
388
                </property>
389
                <property>
390
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name>
391
                    <value>org.apache.hadoop.io.Text</value>
392
                </property>
393
                <property>
394
                    <name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
395
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
396
                </property>
397

  
398
                <!-- claim -->
399
                <property>
400
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
401
                    <value>org.apache.hadoop.io.Text</value>
402
                </property>
403
                <property>
404
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name>
405
                    <value>org.apache.hadoop.io.Text</value>
406
                </property>
407
                <property>
408
                    <name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
409
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
410
                </property>
411

  
410 412
                <!-- cncept -->
411 413

  
412 414
                <property>
413
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
414
					<value>org.apache.hadoop.io.Text</value>
415
				</property>
416
				<property>
417
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
418
					<value>org.apache.hadoop.io.Text</value>
419
				</property>
420
				<property>
421
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
422
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
423
				</property>
415
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
416
                    <value>org.apache.hadoop.io.Text</value>
417
                </property>
418
                <property>
419
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
420
                    <value>org.apache.hadoop.io.Text</value>
421
                </property>
422
                <property>
423
                    <name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
424
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
425
                </property>
424 426

  
425 427

  
426 428
                <!-- persons -->
427 429

  
428
                 <property>
430
                <property>
429 431
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
430 432
                    <value>org.apache.hadoop.io.Text</value>
431 433
                </property>
......
437 439
                    <name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
438 440
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
439 441
                </property>
440
 
441 442

  
443

  
442 444
                <!-- person results-->
443
               <property>
445
                <property>
444 446
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name>
445 447
                    <value>org.apache.hadoop.io.Text</value>
446 448
                </property>
......
452 454
                    <name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name>
453 455
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
454 456
                </property>
455
 
456
     <property>
457

  
458
                <property>
457 459
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name>
458 460
                    <value>org.apache.hadoop.io.Text</value>
459 461
                </property>
......
465 467
                    <name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name>
466 468
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
467 469
                </property>
468
 
469
     <property>
470

  
471
                <property>
470 472
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name>
471 473
                    <value>org.apache.hadoop.io.Text</value>
472 474
                </property>
......
478 480
                    <name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name>
479 481
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
480 482
                </property>
481
 
482
     <property>
483

  
484
                <property>
483 485
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name>
484 486
                    <value>org.apache.hadoop.io.Text</value>
485 487
                </property>
......
491 493
                    <name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name>
492 494
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
493 495
                </property>
494
 
495 496

  
496
     <property>
497

  
498
                <property>
497 499
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name>
498 500
                    <value>org.apache.hadoop.io.Text</value>
499 501
                </property>
......
505 507
                    <name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name>
506 508
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
507 509
                </property>
508
 
509
     <property>
510

  
511
                <property>
510 512
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name>
511 513
                    <value>org.apache.hadoop.io.Text</value>
512 514
                </property>
......
518 520
                    <name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name>
519 521
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
520 522
                </property>
521
      <property>
523
                <property>
522 524
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name>
523 525
                    <value>org.apache.hadoop.io.Text</value>
524 526
                </property>
......
530 532
                    <name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name>
531 533
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
532 534
                </property>
533
 
534
     <property>
535

  
536
                <property>
535 537
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name>
536 538
                    <value>org.apache.hadoop.io.Text</value>
537 539
                </property>
......
543 545
                    <name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name>
544 546
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
545 547
                </property>
546
 
547
     <property>
548

  
549
                <property>
548 550
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name>
549 551
                    <value>org.apache.hadoop.io.Text</value>
550 552
                </property>
......
556 558
                    <name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name>
557 559
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
558 560
                </property>
559
 
560
     <property>
561

  
562
                <property>
561 563
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name>
562 564
                    <value>org.apache.hadoop.io.Text</value>
563 565
                </property>
......
569 571
                    <name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name>
570 572
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
571 573
                </property>
572
 
573
     <property>
574

  
575
                <property>
574 576
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name>
575 577
                    <value>org.apache.hadoop.io.Text</value>
576 578
                </property>
......
582 584
                    <name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name>
583 585
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
584 586
                </property>
585
 
586 587

  
587
 <property>
588

  
589
                <property>
588 590
                    <name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name>
589 591
                    <value>org.apache.hadoop.io.Text</value>
590 592
                </property>
......
597 599
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
598 600
                </property>
599 601

  
602

  
603
                <!--usage statistics start here-->
604

  
605

  
606
                <!--datasourceStats-->
607
                <property>
608
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name>
609
                    <value>org.apache.hadoop.io.Text</value>
610
                </property>
611
                <property>
612
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name>
613
                    <value>org.apache.hadoop.io.Text</value>
614
                </property>
615
                <property>
616
                    <name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name>
617
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
618
                </property>
619
                <!--projectStats-->
620

  
621

  
622
                <property>
623
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name>
624
                    <value>org.apache.hadoop.io.Text</value>
625
                </property>
626
                <property>
627
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name>
628
                    <value>org.apache.hadoop.io.Text</value>
629
                </property>
630
                <property>
631
                    <name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name>
632
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
633
                </property>
634

  
635

  
636
                <!--resultStats-->
637

  
638
                <property>
639
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name>
640
                    <value>org.apache.hadoop.io.Text</value>
641
                </property>
642
                <property>
643
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name>
644
                    <value>org.apache.hadoop.io.Text</value>
645
                </property>
646
                <property>
647
                    <name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name>
648
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
649
                </property>
650

  
651

  
652
                <!--organizationStats-->
653

  
654
                <property>
655
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name>
656
                    <value>org.apache.hadoop.io.Text</value>
657
                </property>
658
                <property>
659
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name>
660
                    <value>org.apache.hadoop.io.Text</value>
661
                </property>
662
                <property>
663
                    <name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name>
664
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
665
                </property>
666

  
667

  
668

  
600 669
                <!-- ## Classes of mapper and reducer -->
601
				
602
				<property>
603
					<name>mapreduce.map.class</name>
604
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper</value>
605
				</property>
606
				<property>
607
					<name>mapreduce.reduce.class</name>
608
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer</value>
609
				</property>
610
				<property>
611
					<name>io.serializations</name>
612
					<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
613
				</property>
614 670

  
615
				<!-- ## Custom config -->
616
				
617
				<!--delim character used to seperate fields in hdfs dump files <property> -->
618
				<property>
619
					<name>stats.delim</name>
620
					<value>${Stats_delim_Character}</value>
621
				</property>
622
				<!--default string for Null String Values -->
623
				<property>
624
					<name>stats.nullString</name>
625
					<value>${Stats_null_String_Field}</value>
626
				</property>
627
				<!--default string for Null Numeric Values -->
628
				<property>
629
					<name>stats.nullNum</name>
630
					<value>${Stats_null_Numeric_Field}</value>
631
				</property>
632
				<property>
633
					<name>stats.enclChar</name>
634
					<value>${Stats_enclosing_Character}</value>
635
				</property>
636
				
637
				
638
				<!--source hbase table -->
639
				<property>
640
					<name>hbase.mapreduce.inputtable</name>
641
					<value>${Stats_Hbase_Source_Table}</value>
642
				</property>
643
				<property>
644
					<name>hbase.mapred.inputtable</name>
645
					<value>${Stats_Hbase_Source_Table}</value>
646
				</property>
647
				
648
				<property>
649
					<!-- mapping of protos entities to tables in the relDB -->
650
					<name>stats.dbTablesMap</name>
651
					<value>${Stats_db_table_map}</value>
652
				</property>
653
				
654
				<!-- This directory does not correspond to a data store. In fact, this directory only
655
					contains multiple data stores. It has to be set to the name of the workflow node.
656
					-->
657
				<property>
658
					<name>mapred.output.dir</name>
659
					<value>${Stats_output_Path}</value>
660
				</property>
661
				<property>
662
					<name>stats.indexConf</name>
663
					<value>${Stats_indexConf}</value>
664
				</property>
665
				<!-- ## Workflow node parameters -->
666
				<property>
667
					<name>mapred.reduce.tasks</name>
668
					<value>${numReducers}</value>
669
				</property>
670
				
671
			</configuration>
672
		</map-reduce>
673
        <ok to="end" />
671
                <property>
672
                    <name>mapreduce.map.class</name>
673
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value>
674
                </property>
675
                <property>
676
                    <name>mapreduce.reduce.class</name>
677
                    <value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value>
678
                </property>
679
                <property>
680
                    <name>io.serializations</name>
681
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
682
                </property>
674 683

  
675
		<error to="fail" />
676
	</action>
677
	
678
	<action name='exportContext'>
679
		<java>
680
			<prepare>
681
			</prepare>
682
			<configuration>
683
				<property>
684
					<name>mapred.job.queue.name</name>
685
					<value>${queueName}</value>
686
				</property>
687
			</configuration>
688
			<main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
689
			<arg>${Stats_output_Path}</arg>
690
			<arg>${isLookupEndpoint}</arg>
691
		</java>
692
 	<ok to="prepareDatabase" />
684
                <!-- ## Custom config -->
693 685

  
694
		<error to="fail" />
695
	</action>	
696
	<action name="prepareDatabase">
697
		<java>
698
				<prepare>
699
			</prepare>
700
			<configuration>
701
				<property>
702
					<name>mapred.job.queue.name</name>
703
					<value>${queueName}</value>
704
				</property>
705
			</configuration>
706
			
707
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
708
			<arg>-SworkingDir=${workingDir}</arg>
709
			<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
710
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
711
			<arg>-PStats_db_User=${Stats_db_User}</arg>
712
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
713
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
714
			
715
		</java>
686
                <!--delim character used to seperate fields in hdfs dump files <property> -->
687
                <property>
688
                    <name>stats.delim</name>
689
                    <value>${Stats_delim_Character}</value>
690
                </property>
691
                <!--default string for Null String Values -->
692
                <property>
693
                    <name>stats.nullString</name>
694
                    <value>${Stats_null_String_Field}</value>
695
                </property>
696
                <!--default string for Null Numeric Values -->
697
                <property>
698
                    <name>stats.nullNum</name>
699
                    <value>${Stats_null_Numeric_Field}</value>
700
                </property>
701
                <property>
702
                    <name>stats.enclChar</name>
703
                    <value>${Stats_enclosing_Character}</value>
704
                </property>
716 705

  
717
		<ok to="sqoopImport" /> 
718
		<error to="fail" />
719
	</action>
720
	
721
	<action name="sqoopImport">
722
		<java>
723
			<prepare>
724
			</prepare>
725
			<configuration>
726
				<property>
727
					<name>mapred.job.queue.name</name>
728
					<value>${queueName}</value>
729
				</property>
730
				
731
				<property>
732
					<name>oozie.sqoop.log.level</name>
733
					<value>DEBUG</value>
734
				</property>
735
				
736
			</configuration>
737
			
738
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
739
			<arg>-SworkingDir=${workingDir}</arg>
740
			<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
741
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
742
			<arg>-PStats_db_User=${Stats_db_User}</arg>
743
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
744
			
745
			<arg>-PStats_output_Path=${Stats_output_Path}</arg>
746
			<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
747
			<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
748
			<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
749
			<arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
750
			<arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
751
			<arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
752
			
753
		</java>
754
 <ok to="end" />
755
		
756 706

  
757
          <error to="fail" />
758
	</action>
759
	
760
	<action name="finalizeDatabase">
761
		<java>
762
			<prepare>
763
			</prepare>
764
			<configuration>
765
				<property>
766
					<name>mapred.job.queue.name</name>
767
					<value>${queueName}</value>
768
				</property>
769
			</configuration>
770
			
771
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
772
			<arg>-SworkingDir=${workingDir}</arg>
773
			<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
774
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
775
			<arg>-PStats_db_User=${Stats_db_User}</arg>
776
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
777
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
778
		</java>
779
		<ok to="cleanUpHDFS" />
780
		<error to="fail" />
781
	</action>
707
                <!--source hbase table -->
708
                <property>
709
                    <name>hbase.mapreduce.inputtable</name>
710
                    <value>${Stats_Hbase_Source_Table}</value>
711
                </property>
712
                <property>
713
                    <name>hbase.mapred.inputtable</name>
714
                    <value>${Stats_Hbase_Source_Table}</value>
715
                </property>
782 716

  
783
    <action name="generateReports">
717
                <property>
718
                    <!-- mapping of protos entities to tables in the relDB -->
719
                    <name>stats.dbTablesMap</name>
720
                    <value>${Stats_db_table_map}</value>
721
                </property>
722

  
723
                <!-- This directory does not correspond to a data store. In fact, this directory only
724
                    contains multiple data stores. It has to be set to the name of the workflow node.
725
                    -->
726
                <property>
727
                    <name>mapred.output.dir</name>
728
                    <value>${Stats_output_Path}</value>
729
                </property>
730
                <property>
731
                    <name>stats.indexConf</name>
732
                    <value>${Stats_indexConf}</value>
733
                </property>
734
                <!-- ## Workflow node parameters -->
735
                <property>
736
                    <name>mapred.reduce.tasks</name>
737
                    <value>${numReducers}</value>
738
                </property>
739

  
740
            </configuration>
741
        </map-reduce>
742
        <ok to="exportContext"/>
743

  
744
        <error to="fail"/>
745
    </action>
746

  
747
    <action name='exportUsageStats'>
784 748
        <java>
785 749
            <prepare>
786 750
            </prepare>
......
790 754
                    <value>${queueName}</value>
791 755
                </property>
792 756
            </configuration>
757
            <main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class>
758
            <arg>${Stats_output_Path}</arg>
759
            <arg>${isLookupEndpoint}</arg>
760
        </java>
761
        <ok to="exportUsageStats"/>
793 762

  
763
        <error to="fail"/>
764
    </action>
765

  
766
    <action name='exportUsageStats'>
767
        <java>
768
            <prepare>
769
            </prepare>
770
            <configuration>
771
                <property>
772
                    <name>mapred.job.queue.name</name>
773
                    <value>${queueName}</value>
774
                </property>
775
            </configuration>
776
            <main-class>eu.dnetlib.iis.core.workflows.stats.UsageStatsExportWrapper</main-class>
777
            <arg>${Stats_usageDB_url}</arg>
778
            <arg>${Stats_usageDB_Driver}</arg>
779
            <arg>${Stats_delim_Character}</arg>
780
            <arg>${Stats_output_Path}</arg>
781
            <arg>${Stats_usageDB_entities}</arg>
782
        </java>
783

  
784
        <ok to="sqoopImport"/>
785
        <error to="fail"/>
786
    </action>
787

  
788
    <action name="prepareDatabase">
789
        <java>
790
            <prepare>
791
            </prepare>
792
            <configuration>
793
                <property>
794
                    <name>mapred.job.queue.name</name>
795
                    <value>${queueName}</value>
796
                </property>
797
            </configuration>
798

  
794 799
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
795 800
            <arg>-SworkingDir=${workingDir}</arg>
796
            <arg>eu.dnetlib.iis.core.workflows.stats.ReportWrapper</arg>
801
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
797 802
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
798 803
            <arg>-PStats_db_User=${Stats_db_User}</arg>
799 804
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
800 805
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
801
            <arg>-PReport_Path=/tmp/</arg>
806

  
802 807
        </java>
803
        <ok to="end" />
804
        <error to="fail" />
808

  
809
        <ok to="sqoopImport"/>
810
        <error to="fail"/>
805 811
    </action>
806
	
807
	<action name="cleanUpHDFS">
808
		<fs>
809
			
810
			<delete path="${nameNode}${Stats_output_Path}" />
811
			
812
		</fs>
813
		<ok to="end" />
814
		<error to="fail" />
815
	</action>
816
	<kill name="fail">
817
		<message>
818
			Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
819
		</message>
820
	</kill>
821
	<end name="end" />
812

  
813
    <action name="sqoopImport">
814
        <java>
815
            <prepare>
816
            </prepare>
817
            <configuration>
818
                <property>
819
                    <name>mapred.job.queue.name</name>
820
                    <value>${queueName}</value>
821
                </property>
822

  
823
                <property>
824
                    <name>oozie.sqoop.log.level</name>
825
                    <value>DEBUG</value>
826
                </property>
827

  
828
            </configuration>
829

  
830
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
831
            <arg>-SworkingDir=${workingDir}</arg>
832
            <arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
833
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
834
            <arg>-PStats_db_User=${Stats_db_User}</arg>
835
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
836

  
837
            <arg>-PStats_output_Path=${Stats_output_Path}</arg>
838
            <arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
839
            <arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
840
            <arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
841
            <arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
842
            <arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
843
            <arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg>
844

  
845
        </java>
846
        <ok to="end"/>
847

  
848

  
849
        <error to="fail"/>
850
    </action>
851

  
852
    <action name="finalizeDatabase">
853
        <java>
854
            <prepare>
855
            </prepare>
856
            <configuration>
857
                <property>
858
                    <name>mapred.job.queue.name</name>
859
                    <value>${queueName}</value>
860
                </property>
861
            </configuration>
862

  
863
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
864
            <arg>-SworkingDir=${workingDir}</arg>
865
            <arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
866
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
867
            <arg>-PStats_db_User=${Stats_db_User}</arg>
868
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
869
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
870
        </java>
871
        <ok to="cleanUpHDFS"/>
872
        <error to="fail"/>
873
    </action>
874

  
875
    <action name="cleanUpHDFS">
876
        <fs>
877

  
878
            <delete path="${nameNode}${Stats_output_Path}"/>
879

  
880
        </fs>
881
        <ok to="end"/>
882
        <error to="fail"/>
883
    </action>
884
    <kill name="fail">
885
        <message>
886
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
887
        </message>
888
    </kill>
889
    <end name="end"/>
822 890
</workflow-app>

Also available in: Unified diff