Revision 41501
Added by Eri Katsari about 8 years ago
workflow.xml | ||
---|---|---|
1 | 1 |
<workflow-app name="stats-export" |
2 |
xmlns="uri:oozie:workflow:0.4">
|
|
3 |
<!-- map reduce job that exports hbase data and prepares them for import to the relation
|
|
4 |
database used for statistics generation -->
|
|
5 |
|
|
6 |
<global>
|
|
7 |
<job-tracker>${jobTracker}</job-tracker>
|
|
8 |
<name-node>${nameNode}</name-node>
|
|
9 |
<configuration>
|
|
10 |
<property>
|
|
11 |
<name>mapred.job.queue.name</name>
|
|
12 |
<value>${queueName}</value>
|
|
13 |
</property>
|
|
14 |
<property>
|
|
15 |
<name>oozie.sqoop.log.level</name>
|
|
16 |
<value>DEBUG</value>
|
|
17 |
</property>
|
|
18 |
<property>
|
|
2 |
xmlns="uri:oozie:workflow:0.4">
|
|
3 |
<!-- map reduce job that exports hbase data and prepares them for import to the relation
|
|
4 |
database used for statistics generation -->
|
|
5 |
|
|
6 |
<global>
|
|
7 |
<job-tracker>${jobTracker}</job-tracker>
|
|
8 |
<name-node>${nameNode}</name-node>
|
|
9 |
<configuration>
|
|
10 |
<property>
|
|
11 |
<name>mapred.job.queue.name</name>
|
|
12 |
<value>${queueName}</value>
|
|
13 |
</property>
|
|
14 |
<property>
|
|
15 |
<name>oozie.sqoop.log.level</name>
|
|
16 |
<value>DEBUG</value>
|
|
17 |
</property>
|
|
18 |
<property>
|
|
19 | 19 |
<name>oozie.launcher.mapred.job.priority</name> |
20 |
<value>HIGH</value>
|
|
20 |
<value>NORMAL</value>
|
|
21 | 21 |
</property> |
22 | 22 |
<property> |
23 | 23 |
<name>mapred.job.priority</name> |
24 |
<value>HIGH</value>
|
|
24 |
<value>NORMAL</value>
|
|
25 | 25 |
</property> |
26 |
</configuration> |
|
27 |
</global> |
|
28 |
<start to="mr_export"/> |
|
29 |
<action name="mr_export"> |
|
30 |
<map-reduce> |
|
31 |
|
|
32 |
<prepare> |
|
33 |
<delete path="${nameNode}${Stats_output_Path}" /> |
|
34 |
|
|
35 |
</prepare> |
|
36 |
<configuration> |
|
37 |
<property> |
|
38 |
<name>hbase.mapreduce.scan</name> |
|
39 |
<value>${wf:actionData('get-scanner')['scan']}</value> |
|
40 |
</property> |
|
41 |
<property> |
|
42 |
<name>hbase.rootdir</name> |
|
43 |
<value>$nameNode/hbase</value> |
|
44 |
|
|
45 |
</property> |
|
46 |
|
|
47 |
<property> |
|
48 |
<name>hbase.security.authentication</name> |
|
49 |
<value>simple</value> |
|
50 |
</property> |
|
51 |
<!-- ZOOKEEPER --> |
|
52 |
|
|
53 |
<property> |
|
54 |
<name>hbase.zookeeper.quorum</name> |
|
55 |
<value> |
|
56 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
57 |
</value> |
|
58 |
<!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
59 |
</value> --> |
|
60 |
</property> |
|
61 |
<property> |
|
62 |
<name>zookeeper.znode.rootserver</name> |
|
63 |
<value>root-region-server</value> |
|
64 |
|
|
65 |
</property> |
|
66 |
|
|
67 |
<property> |
|
68 |
<name>hbase.zookeeper.property.clientPort</name> |
|
69 |
<value>2181</value> |
|
70 |
</property> |
|
71 |
|
|
72 |
|
|
73 |
<!-- MR IO --> |
|
74 |
|
|
75 |
|
|
76 |
<property> |
|
77 |
<name>mapreduce.inputformat.class</name> |
|
78 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value> |
|
79 |
</property> |
|
80 |
|
|
81 |
<property> |
|
82 |
<name>mapred.mapoutput.key.class</name> |
|
83 |
<value>org.apache.hadoop.io.Text</value> |
|
84 |
</property> |
|
85 |
<property> |
|
86 |
<name>mapred.mapoutput.value.class</name> |
|
87 |
<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value> |
|
88 |
</property> |
|
89 |
<property> |
|
90 |
<name>mapred.output.key.class</name> |
|
91 |
<value>org.apache.hadoop.io.Text</value> |
|
92 |
</property> |
|
93 |
<property> |
|
94 |
<name>mapred.output.value.class</name> |
|
95 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value> |
|
96 |
</property> |
|
97 |
|
|
98 |
<!-- ## This is required for new MapReduce API usage --> |
|
99 |
<property> |
|
100 |
<name>mapred.mapper.new-api</name> |
|
101 |
<value>true</value> |
|
102 |
</property> |
|
103 |
<property> |
|
104 |
<name>mapred.reducer.new-api</name> |
|
105 |
<value>true</value> |
|
106 |
</property> |
|
107 |
|
|
108 |
<!-- # Job-specific options --> |
|
109 |
<property> |
|
110 |
<name>dfs.blocksize</name> |
|
111 |
<value>32M</value> |
|
112 |
</property> |
|
113 |
<property> |
|
114 |
<name>mapred.output.compress</name> |
|
115 |
<value>false</value> |
|
116 |
</property> |
|
117 |
<property> |
|
118 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
119 |
<value>false</value> |
|
120 |
</property> |
|
121 |
<property> |
|
122 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
123 |
<value>false</value> |
|
124 |
</property> |
|
125 |
<property> |
|
126 |
<name>mapreduce.map.speculative</name> |
|
127 |
<value>false</value> |
|
128 |
</property> |
|
129 |
|
|
130 |
<!-- I/O FORMAT --> |
|
131 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix |
|
132 |
issue with traling tab added between id and value in multiple outputs --> |
|
133 |
<property> |
|
134 |
<name>mapred.textoutputformat.separator</name> |
|
135 |
<value>${Stats_delim_Character}</value> |
|
136 |
</property> |
|
137 |
<!-- ## Names of all output ports --> |
|
138 |
|
|
139 |
<property> |
|
140 |
<name>mapreduce.multipleoutputs</name> |
|
141 |
|
|
142 |
<value> |
|
143 |
${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} ${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} ${out32} ${out33} |
|
144 |
</value> |
|
145 |
|
|
146 |
</property> |
|
147 |
<!-- datasource --> |
|
148 |
<property> |
|
149 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name> |
|
150 |
<value>org.apache.hadoop.io.Text</value> |
|
151 |
</property> |
|
152 |
<property> |
|
153 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name> |
|
154 |
<value>org.apache.hadoop.io.Text</value> |
|
155 |
</property> |
|
156 |
<property> |
|
157 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name> |
|
158 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
159 |
</property> |
|
160 |
<!-- datasourceLanguage --> |
|
161 |
<property> |
|
162 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name> |
|
163 |
<value>org.apache.hadoop.io.Text</value> |
|
164 |
</property> |
|
165 |
<property> |
|
166 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name> |
|
167 |
<value>org.apache.hadoop.io.Text</value> |
|
168 |
</property> |
|
169 |
<property> |
|
170 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name> |
|
171 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
172 |
</property> |
|
173 |
|
|
174 |
<!-- datasourceOrganization --> |
|
175 |
<property> |
|
176 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name> |
|
177 |
<value>org.apache.hadoop.io.Text</value> |
|
178 |
</property> |
|
179 |
<property> |
|
180 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name> |
|
181 |
<value>org.apache.hadoop.io.Text</value> |
|
182 |
</property> |
|
183 |
<property> |
|
184 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name> |
|
185 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
186 |
</property> |
|
187 |
|
|
188 |
<!-- datasourceTopic --> |
|
189 |
<property> |
|
190 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name> |
|
191 |
<value>org.apache.hadoop.io.Text</value> |
|
192 |
</property> |
|
193 |
<property> |
|
194 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name> |
|
195 |
<value>org.apache.hadoop.io.Text</value> |
|
196 |
</property> |
|
197 |
<property> |
|
198 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name> |
|
199 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
200 |
</property> |
|
201 |
|
|
202 |
<!-- resultDatasource --> |
|
203 |
<property> |
|
204 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name> |
|
205 |
<value>org.apache.hadoop.io.Text</value> |
|
206 |
</property> |
|
207 |
<property> |
|
208 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name> |
|
209 |
<value>org.apache.hadoop.io.Text</value> |
|
210 |
</property> |
|
211 |
<property> |
|
212 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name> |
|
213 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
214 |
</property> |
|
215 |
<!-- organization --> |
|
216 |
<property> |
|
217 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name> |
|
218 |
<value>org.apache.hadoop.io.Text</value> |
|
219 |
</property> |
|
220 |
<property> |
|
221 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name> |
|
222 |
<value>org.apache.hadoop.io.Text</value> |
|
223 |
</property> |
|
224 |
<property> |
|
225 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name> |
|
226 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
227 |
</property> |
|
228 |
|
|
229 |
<!-- projectOrganization --> |
|
230 |
<property> |
|
231 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name> |
|
232 |
<value>org.apache.hadoop.io.Text</value> |
|
233 |
</property> |
|
234 |
<property> |
|
235 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name> |
|
236 |
<value>org.apache.hadoop.io.Text</value> |
|
237 |
</property> |
|
238 |
<property> |
|
239 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name> |
|
240 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
241 |
</property> |
|
242 |
<!-- resultProject --> |
|
243 |
<property> |
|
244 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name> |
|
245 |
<value>org.apache.hadoop.io.Text</value> |
|
246 |
</property> |
|
247 |
<property> |
|
248 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name> |
|
249 |
<value>org.apache.hadoop.io.Text</value> |
|
250 |
</property> |
|
251 |
<property> |
|
252 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name> |
|
253 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
254 |
</property> |
|
255 |
|
|
256 |
<!-- project --> |
|
257 |
<property> |
|
258 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name> |
|
259 |
<value>org.apache.hadoop.io.Text</value> |
|
260 |
</property> |
|
261 |
<property> |
|
262 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name> |
|
263 |
<value>org.apache.hadoop.io.Text</value> |
|
264 |
</property> |
|
265 |
<property> |
|
266 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name> |
|
267 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
268 |
</property> |
|
269 |
|
|
270 |
<!-- resultConcept --> |
|
271 |
<property> |
|
272 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name> |
|
273 |
<value>org.apache.hadoop.io.Text</value> |
|
274 |
</property> |
|
275 |
<property> |
|
276 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name> |
|
277 |
<value>org.apache.hadoop.io.Text</value> |
|
278 |
</property> |
|
279 |
<property> |
|
280 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name> |
|
281 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
282 |
</property> |
|
283 |
|
|
284 |
<!-- resultClaim --> |
|
285 |
<property> |
|
286 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name> |
|
287 |
<value>org.apache.hadoop.io.Text</value> |
|
288 |
</property> |
|
289 |
<property> |
|
290 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name> |
|
291 |
<value>org.apache.hadoop.io.Text</value> |
|
292 |
</property> |
|
293 |
<property> |
|
294 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name> |
|
295 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
296 |
</property> |
|
297 |
|
|
298 |
<!-- resultClassification --> |
|
299 |
<property> |
|
300 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name> |
|
301 |
<value>org.apache.hadoop.io.Text</value> |
|
302 |
</property> |
|
303 |
<property> |
|
304 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name> |
|
305 |
<value>org.apache.hadoop.io.Text</value> |
|
306 |
</property> |
|
307 |
<property> |
|
308 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name> |
|
309 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
310 |
</property> |
|
311 |
|
|
312 |
<!-- resultLanguage --> |
|
313 |
<property> |
|
314 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name> |
|
315 |
<value>org.apache.hadoop.io.Text</value> |
|
316 |
</property> |
|
317 |
<property> |
|
318 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name> |
|
319 |
<value>org.apache.hadoop.io.Text</value> |
|
320 |
</property> |
|
321 |
<property> |
|
322 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name> |
|
323 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
324 |
</property> |
|
325 |
|
|
326 |
<!-- resultProject --> |
|
327 |
<property> |
|
328 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name> |
|
329 |
<value>org.apache.hadoop.io.Text</value> |
|
330 |
</property> |
|
331 |
<property> |
|
332 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name> |
|
333 |
<value>org.apache.hadoop.io.Text</value> |
|
334 |
</property> |
|
335 |
<property> |
|
336 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name> |
|
337 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
338 |
</property> |
|
339 |
<!-- resultResult --> |
|
340 |
<property> |
|
341 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name> |
|
342 |
<value>org.apache.hadoop.io.Text</value> |
|
343 |
</property> |
|
344 |
<property> |
|
345 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name> |
|
346 |
<value>org.apache.hadoop.io.Text</value> |
|
347 |
</property> |
|
348 |
<property> |
|
349 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name> |
|
350 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
351 |
</property> |
|
352 |
<!-- resultTopic --> |
|
353 |
<property> |
|
354 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name> |
|
355 |
<value>org.apache.hadoop.io.Text</value> |
|
356 |
</property> |
|
357 |
<property> |
|
358 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name> |
|
359 |
<value>org.apache.hadoop.io.Text</value> |
|
360 |
</property> |
|
361 |
<property> |
|
362 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name> |
|
363 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
364 |
</property> |
|
365 |
|
|
366 |
<!-- resultDatasource --> |
|
367 |
<property> |
|
368 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name> |
|
369 |
<value>org.apache.hadoop.io.Text</value> |
|
370 |
</property> |
|
371 |
<property> |
|
372 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name> |
|
373 |
<value>org.apache.hadoop.io.Text</value> |
|
374 |
</property> |
|
375 |
<property> |
|
376 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name> |
|
377 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
378 |
</property> |
|
379 |
|
|
380 |
|
|
381 |
|
|
382 |
<!-- result --> |
|
383 |
<property> |
|
384 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name> |
|
385 |
<value>org.apache.hadoop.io.Text</value> |
|
386 |
</property> |
|
387 |
<property> |
|
388 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name> |
|
389 |
<value>org.apache.hadoop.io.Text</value> |
|
390 |
</property> |
|
391 |
<property> |
|
392 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name> |
|
393 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
394 |
</property> |
|
395 |
|
|
396 |
<!-- claim --> |
|
397 |
<property> |
|
398 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name> |
|
399 |
<value>org.apache.hadoop.io.Text</value> |
|
400 |
</property> |
|
401 |
<property> |
|
402 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name> |
|
403 |
<value>org.apache.hadoop.io.Text</value> |
|
404 |
</property> |
|
405 |
<property> |
|
406 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name> |
|
407 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
408 |
</property> |
|
26 |
</configuration> |
|
27 |
</global> |
|
28 |
<start to="exportUsageStats"/> |
|
29 |
<action name="mr_export"> |
|
30 |
<map-reduce> |
|
409 | 31 |
|
32 |
<prepare> |
|
33 |
<delete path="${nameNode}${Stats_output_Path}"/> |
|
34 |
|
|
35 |
</prepare> |
|
36 |
<configuration> |
|
37 |
<property> |
|
38 |
<name>hbase.mapreduce.scan</name> |
|
39 |
<value>${wf:actionData('get-scanner')['scan']}</value> |
|
40 |
</property> |
|
41 |
<property> |
|
42 |
<name>hbase.rootdir</name> |
|
43 |
<value>$nameNode/hbase</value> |
|
44 |
|
|
45 |
</property> |
|
46 |
|
|
47 |
<property> |
|
48 |
<name>hbase.security.authentication</name> |
|
49 |
<value>simple</value> |
|
50 |
</property> |
|
51 |
<!-- ZOOKEEPER --> |
|
52 |
|
|
53 |
<property> |
|
54 |
<name>hbase.zookeeper.quorum</name> |
|
55 |
<value> |
|
56 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
57 |
</value> |
|
58 |
<!-- <value> quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
59 |
</value> --> |
|
60 |
</property> |
|
61 |
<property> |
|
62 |
<name>zookeeper.znode.rootserver</name> |
|
63 |
<value>root-region-server</value> |
|
64 |
|
|
65 |
</property> |
|
66 |
|
|
67 |
<property> |
|
68 |
<name>hbase.zookeeper.property.clientPort</name> |
|
69 |
<value>2181</value> |
|
70 |
</property> |
|
71 |
|
|
72 |
|
|
73 |
<!-- MR IO --> |
|
74 |
|
|
75 |
|
|
76 |
<property> |
|
77 |
<name>mapreduce.inputformat.class</name> |
|
78 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value> |
|
79 |
</property> |
|
80 |
|
|
81 |
<property> |
|
82 |
<name>mapred.mapoutput.key.class</name> |
|
83 |
<value>org.apache.hadoop.io.Text</value> |
|
84 |
</property> |
|
85 |
<property> |
|
86 |
<name>mapred.mapoutput.value.class</name> |
|
87 |
<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value> |
|
88 |
</property> |
|
89 |
<property> |
|
90 |
<name>mapred.output.key.class</name> |
|
91 |
<value>org.apache.hadoop.io.Text</value> |
|
92 |
</property> |
|
93 |
<property> |
|
94 |
<name>mapred.output.value.class</name> |
|
95 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value> |
|
96 |
</property> |
|
97 |
|
|
98 |
<!-- ## This is required for new MapReduce API usage --> |
|
99 |
<property> |
|
100 |
<name>mapred.mapper.new-api</name> |
|
101 |
<value>true</value> |
|
102 |
</property> |
|
103 |
<property> |
|
104 |
<name>mapred.reducer.new-api</name> |
|
105 |
<value>true</value> |
|
106 |
</property> |
|
107 |
|
|
108 |
<!-- # Job-specific options --> |
|
109 |
<property> |
|
110 |
<name>dfs.blocksize</name> |
|
111 |
<value>32M</value> |
|
112 |
</property> |
|
113 |
<property> |
|
114 |
<name>mapred.output.compress</name> |
|
115 |
<value>false</value> |
|
116 |
</property> |
|
117 |
<property> |
|
118 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
119 |
<value>false</value> |
|
120 |
</property> |
|
121 |
<property> |
|
122 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
123 |
<value>false</value> |
|
124 |
</property> |
|
125 |
<property> |
|
126 |
<name>mapreduce.map.speculative</name> |
|
127 |
<value>false</value> |
|
128 |
</property> |
|
129 |
|
|
130 |
<!-- I/O FORMAT --> |
|
131 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix |
|
132 |
issue with traling tab added between id and value in multiple outputs --> |
|
133 |
<property> |
|
134 |
<name>mapred.textoutputformat.separator</name> |
|
135 |
<value>${Stats_delim_Character}</value> |
|
136 |
</property> |
|
137 |
<!-- ## Names of all output ports --> |
|
138 |
|
|
139 |
<property> |
|
140 |
<name>mapreduce.multipleoutputs</name> |
|
141 |
|
|
142 |
<value> |
|
143 |
${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} |
|
144 |
${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} |
|
145 |
${out22} ${out23} ${out24} ${out25} ${out26} ${out27} ${out28} ${out29} ${out30} ${out31} |
|
146 |
${out32} ${out33} |
|
147 |
</value> |
|
148 |
|
|
149 |
</property> |
|
150 |
<!-- datasource --> |
|
151 |
<property> |
|
152 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name> |
|
153 |
<value>org.apache.hadoop.io.Text</value> |
|
154 |
</property> |
|
155 |
<property> |
|
156 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name> |
|
157 |
<value>org.apache.hadoop.io.Text</value> |
|
158 |
</property> |
|
159 |
<property> |
|
160 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name> |
|
161 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
162 |
</property> |
|
163 |
<!-- datasourceLanguage --> |
|
164 |
<property> |
|
165 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name> |
|
166 |
<value>org.apache.hadoop.io.Text</value> |
|
167 |
</property> |
|
168 |
<property> |
|
169 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name> |
|
170 |
<value>org.apache.hadoop.io.Text</value> |
|
171 |
</property> |
|
172 |
<property> |
|
173 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name> |
|
174 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
175 |
</property> |
|
176 |
|
|
177 |
<!-- datasourceOrganization --> |
|
178 |
<property> |
|
179 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name> |
|
180 |
<value>org.apache.hadoop.io.Text</value> |
|
181 |
</property> |
|
182 |
<property> |
|
183 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name> |
|
184 |
<value>org.apache.hadoop.io.Text</value> |
|
185 |
</property> |
|
186 |
<property> |
|
187 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name> |
|
188 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
189 |
</property> |
|
190 |
|
|
191 |
<!-- datasourceTopic --> |
|
192 |
<property> |
|
193 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name> |
|
194 |
<value>org.apache.hadoop.io.Text</value> |
|
195 |
</property> |
|
196 |
<property> |
|
197 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name> |
|
198 |
<value>org.apache.hadoop.io.Text</value> |
|
199 |
</property> |
|
200 |
<property> |
|
201 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name> |
|
202 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
203 |
</property> |
|
204 |
|
|
205 |
<!-- resultDatasource --> |
|
206 |
<property> |
|
207 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name> |
|
208 |
<value>org.apache.hadoop.io.Text</value> |
|
209 |
</property> |
|
210 |
<property> |
|
211 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name> |
|
212 |
<value>org.apache.hadoop.io.Text</value> |
|
213 |
</property> |
|
214 |
<property> |
|
215 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name> |
|
216 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
217 |
</property> |
|
218 |
<!-- organization --> |
|
219 |
<property> |
|
220 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name> |
|
221 |
<value>org.apache.hadoop.io.Text</value> |
|
222 |
</property> |
|
223 |
<property> |
|
224 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name> |
|
225 |
<value>org.apache.hadoop.io.Text</value> |
|
226 |
</property> |
|
227 |
<property> |
|
228 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name> |
|
229 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
230 |
</property> |
|
231 |
|
|
232 |
<!-- projectOrganization --> |
|
233 |
<property> |
|
234 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name> |
|
235 |
<value>org.apache.hadoop.io.Text</value> |
|
236 |
</property> |
|
237 |
<property> |
|
238 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.value</name> |
|
239 |
<value>org.apache.hadoop.io.Text</value> |
|
240 |
</property> |
|
241 |
<property> |
|
242 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name> |
|
243 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
244 |
</property> |
|
245 |
<!-- resultProject --> |
|
246 |
<property> |
|
247 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name> |
|
248 |
<value>org.apache.hadoop.io.Text</value> |
|
249 |
</property> |
|
250 |
<property> |
|
251 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.value</name> |
|
252 |
<value>org.apache.hadoop.io.Text</value> |
|
253 |
</property> |
|
254 |
<property> |
|
255 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name> |
|
256 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
257 |
</property> |
|
258 |
|
|
259 |
<!-- project --> |
|
260 |
<property> |
|
261 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name> |
|
262 |
<value>org.apache.hadoop.io.Text</value> |
|
263 |
</property> |
|
264 |
<property> |
|
265 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.value</name> |
|
266 |
<value>org.apache.hadoop.io.Text</value> |
|
267 |
</property> |
|
268 |
<property> |
|
269 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name> |
|
270 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
271 |
</property> |
|
272 |
|
|
273 |
<!-- resultConcept --> |
|
274 |
<property> |
|
275 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name> |
|
276 |
<value>org.apache.hadoop.io.Text</value> |
|
277 |
</property> |
|
278 |
<property> |
|
279 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.value</name> |
|
280 |
<value>org.apache.hadoop.io.Text</value> |
|
281 |
</property> |
|
282 |
<property> |
|
283 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name> |
|
284 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
285 |
</property> |
|
286 |
|
|
287 |
<!-- resultClaim --> |
|
288 |
<property> |
|
289 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name> |
|
290 |
<value>org.apache.hadoop.io.Text</value> |
|
291 |
</property> |
|
292 |
<property> |
|
293 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.value</name> |
|
294 |
<value>org.apache.hadoop.io.Text</value> |
|
295 |
</property> |
|
296 |
<property> |
|
297 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name> |
|
298 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
299 |
</property> |
|
300 |
|
|
301 |
<!-- resultClassification --> |
|
302 |
<property> |
|
303 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name> |
|
304 |
<value>org.apache.hadoop.io.Text</value> |
|
305 |
</property> |
|
306 |
<property> |
|
307 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.value</name> |
|
308 |
<value>org.apache.hadoop.io.Text</value> |
|
309 |
</property> |
|
310 |
<property> |
|
311 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name> |
|
312 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
313 |
</property> |
|
314 |
|
|
315 |
<!-- resultLanguage --> |
|
316 |
<property> |
|
317 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name> |
|
318 |
<value>org.apache.hadoop.io.Text</value> |
|
319 |
</property> |
|
320 |
<property> |
|
321 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.value</name> |
|
322 |
<value>org.apache.hadoop.io.Text</value> |
|
323 |
</property> |
|
324 |
<property> |
|
325 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name> |
|
326 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
327 |
</property> |
|
328 |
|
|
329 |
<!-- resultProject --> |
|
330 |
<property> |
|
331 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name> |
|
332 |
<value>org.apache.hadoop.io.Text</value> |
|
333 |
</property> |
|
334 |
<property> |
|
335 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.value</name> |
|
336 |
<value>org.apache.hadoop.io.Text</value> |
|
337 |
</property> |
|
338 |
<property> |
|
339 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name> |
|
340 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
341 |
</property> |
|
342 |
<!-- resultResult --> |
|
343 |
<property> |
|
344 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name> |
|
345 |
<value>org.apache.hadoop.io.Text</value> |
|
346 |
</property> |
|
347 |
<property> |
|
348 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.value</name> |
|
349 |
<value>org.apache.hadoop.io.Text</value> |
|
350 |
</property> |
|
351 |
<property> |
|
352 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name> |
|
353 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
354 |
</property> |
|
355 |
<!-- resultTopic --> |
|
356 |
<property> |
|
357 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name> |
|
358 |
<value>org.apache.hadoop.io.Text</value> |
|
359 |
</property> |
|
360 |
<property> |
|
361 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.value</name> |
|
362 |
<value>org.apache.hadoop.io.Text</value> |
|
363 |
</property> |
|
364 |
<property> |
|
365 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name> |
|
366 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
367 |
</property> |
|
368 |
|
|
369 |
<!-- resultDatasource --> |
|
370 |
<property> |
|
371 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name> |
|
372 |
<value>org.apache.hadoop.io.Text</value> |
|
373 |
</property> |
|
374 |
<property> |
|
375 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.value</name> |
|
376 |
<value>org.apache.hadoop.io.Text</value> |
|
377 |
</property> |
|
378 |
<property> |
|
379 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name> |
|
380 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
381 |
</property> |
|
382 |
|
|
383 |
|
|
384 |
<!-- result --> |
|
385 |
<property> |
|
386 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name> |
|
387 |
<value>org.apache.hadoop.io.Text</value> |
|
388 |
</property> |
|
389 |
<property> |
|
390 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.value</name> |
|
391 |
<value>org.apache.hadoop.io.Text</value> |
|
392 |
</property> |
|
393 |
<property> |
|
394 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name> |
|
395 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
396 |
</property> |
|
397 |
|
|
398 |
<!-- claim --> |
|
399 |
<property> |
|
400 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name> |
|
401 |
<value>org.apache.hadoop.io.Text</value> |
|
402 |
</property> |
|
403 |
<property> |
|
404 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.value</name> |
|
405 |
<value>org.apache.hadoop.io.Text</value> |
|
406 |
</property> |
|
407 |
<property> |
|
408 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name> |
|
409 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
410 |
</property> |
|
411 |
|
|
410 | 412 |
<!-- cncept --> |
411 | 413 |
|
412 | 414 |
<property> |
413 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
|
|
414 |
<value>org.apache.hadoop.io.Text</value>
|
|
415 |
</property>
|
|
416 |
<property>
|
|
417 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
|
|
418 |
<value>org.apache.hadoop.io.Text</value>
|
|
419 |
</property>
|
|
420 |
<property>
|
|
421 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
|
|
422 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
423 |
</property>
|
|
415 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
|
|
416 |
<value>org.apache.hadoop.io.Text</value>
|
|
417 |
</property>
|
|
418 |
<property>
|
|
419 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.value</name>
|
|
420 |
<value>org.apache.hadoop.io.Text</value>
|
|
421 |
</property>
|
|
422 |
<property>
|
|
423 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
|
|
424 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
425 |
</property>
|
|
424 | 426 |
|
425 | 427 |
|
426 | 428 |
<!-- persons --> |
427 | 429 |
|
428 |
<property>
|
|
430 |
<property> |
|
429 | 431 |
<name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name> |
430 | 432 |
<value>org.apache.hadoop.io.Text</value> |
431 | 433 |
</property> |
... | ... | |
437 | 439 |
<name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name> |
438 | 440 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
439 | 441 |
</property> |
440 |
|
|
441 | 442 |
|
443 |
|
|
442 | 444 |
<!-- person results--> |
443 |
<property> |
|
445 |
<property>
|
|
444 | 446 |
<name>mapreduce.multipleoutputs.namedOutput.${out22}.key</name> |
445 | 447 |
<value>org.apache.hadoop.io.Text</value> |
446 | 448 |
</property> |
... | ... | |
452 | 454 |
<name>mapreduce.multipleoutputs.namedOutput.${out22}.format</name> |
453 | 455 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
454 | 456 |
</property> |
455 |
|
|
456 |
<property> |
|
457 |
|
|
458 |
<property>
|
|
457 | 459 |
<name>mapreduce.multipleoutputs.namedOutput.${out23}.key</name> |
458 | 460 |
<value>org.apache.hadoop.io.Text</value> |
459 | 461 |
</property> |
... | ... | |
465 | 467 |
<name>mapreduce.multipleoutputs.namedOutput.${out23}.format</name> |
466 | 468 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
467 | 469 |
</property> |
468 |
|
|
469 |
<property> |
|
470 |
|
|
471 |
<property>
|
|
470 | 472 |
<name>mapreduce.multipleoutputs.namedOutput.${out24}.key</name> |
471 | 473 |
<value>org.apache.hadoop.io.Text</value> |
472 | 474 |
</property> |
... | ... | |
478 | 480 |
<name>mapreduce.multipleoutputs.namedOutput.${out24}.format</name> |
479 | 481 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
480 | 482 |
</property> |
481 |
|
|
482 |
<property> |
|
483 |
|
|
484 |
<property>
|
|
483 | 485 |
<name>mapreduce.multipleoutputs.namedOutput.${out25}.key</name> |
484 | 486 |
<value>org.apache.hadoop.io.Text</value> |
485 | 487 |
</property> |
... | ... | |
491 | 493 |
<name>mapreduce.multipleoutputs.namedOutput.${out25}.format</name> |
492 | 494 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
493 | 495 |
</property> |
494 |
|
|
495 | 496 |
|
496 |
<property> |
|
497 |
|
|
498 |
<property> |
|
497 | 499 |
<name>mapreduce.multipleoutputs.namedOutput.${out26}.key</name> |
498 | 500 |
<value>org.apache.hadoop.io.Text</value> |
499 | 501 |
</property> |
... | ... | |
505 | 507 |
<name>mapreduce.multipleoutputs.namedOutput.${out26}.format</name> |
506 | 508 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
507 | 509 |
</property> |
508 |
|
|
509 |
<property> |
|
510 |
|
|
511 |
<property>
|
|
510 | 512 |
<name>mapreduce.multipleoutputs.namedOutput.${out27}.key</name> |
511 | 513 |
<value>org.apache.hadoop.io.Text</value> |
512 | 514 |
</property> |
... | ... | |
518 | 520 |
<name>mapreduce.multipleoutputs.namedOutput.${out27}.format</name> |
519 | 521 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
520 | 522 |
</property> |
521 |
<property> |
|
523 |
<property>
|
|
522 | 524 |
<name>mapreduce.multipleoutputs.namedOutput.${out28}.key</name> |
523 | 525 |
<value>org.apache.hadoop.io.Text</value> |
524 | 526 |
</property> |
... | ... | |
530 | 532 |
<name>mapreduce.multipleoutputs.namedOutput.${out28}.format</name> |
531 | 533 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
532 | 534 |
</property> |
533 |
|
|
534 |
<property> |
|
535 |
|
|
536 |
<property>
|
|
535 | 537 |
<name>mapreduce.multipleoutputs.namedOutput.${out29}.key</name> |
536 | 538 |
<value>org.apache.hadoop.io.Text</value> |
537 | 539 |
</property> |
... | ... | |
543 | 545 |
<name>mapreduce.multipleoutputs.namedOutput.${out29}.format</name> |
544 | 546 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
545 | 547 |
</property> |
546 |
|
|
547 |
<property> |
|
548 |
|
|
549 |
<property>
|
|
548 | 550 |
<name>mapreduce.multipleoutputs.namedOutput.${out30}.key</name> |
549 | 551 |
<value>org.apache.hadoop.io.Text</value> |
550 | 552 |
</property> |
... | ... | |
556 | 558 |
<name>mapreduce.multipleoutputs.namedOutput.${out30}.format</name> |
557 | 559 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
558 | 560 |
</property> |
559 |
|
|
560 |
<property> |
|
561 |
|
|
562 |
<property>
|
|
561 | 563 |
<name>mapreduce.multipleoutputs.namedOutput.${out31}.key</name> |
562 | 564 |
<value>org.apache.hadoop.io.Text</value> |
563 | 565 |
</property> |
... | ... | |
569 | 571 |
<name>mapreduce.multipleoutputs.namedOutput.${out31}.format</name> |
570 | 572 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
571 | 573 |
</property> |
572 |
|
|
573 |
<property> |
|
574 |
|
|
575 |
<property>
|
|
574 | 576 |
<name>mapreduce.multipleoutputs.namedOutput.${out32}.key</name> |
575 | 577 |
<value>org.apache.hadoop.io.Text</value> |
576 | 578 |
</property> |
... | ... | |
582 | 584 |
<name>mapreduce.multipleoutputs.namedOutput.${out32}.format</name> |
583 | 585 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
584 | 586 |
</property> |
585 |
|
|
586 | 587 |
|
587 |
<property> |
|
588 |
|
|
589 |
<property> |
|
588 | 590 |
<name>mapreduce.multipleoutputs.namedOutput.${out33}.key</name> |
589 | 591 |
<value>org.apache.hadoop.io.Text</value> |
590 | 592 |
</property> |
... | ... | |
597 | 599 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
598 | 600 |
</property> |
599 | 601 |
|
602 |
|
|
603 |
<!--usage statistics start here--> |
|
604 |
|
|
605 |
|
|
606 |
<!--datasourceStats--> |
|
607 |
<property> |
|
608 |
<name>mapreduce.multipleoutputs.namedOutput.${out34}.key</name> |
|
609 |
<value>org.apache.hadoop.io.Text</value> |
|
610 |
</property> |
|
611 |
<property> |
|
612 |
<name>mapreduce.multipleoutputs.namedOutput.${out34}.value</name> |
|
613 |
<value>org.apache.hadoop.io.Text</value> |
|
614 |
</property> |
|
615 |
<property> |
|
616 |
<name>mapreduce.multipleoutputs.namedOutput.${out34}.format</name> |
|
617 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
618 |
</property> |
|
619 |
<!--projectStats--> |
|
620 |
|
|
621 |
|
|
622 |
<property> |
|
623 |
<name>mapreduce.multipleoutputs.namedOutput.${out35}.key</name> |
|
624 |
<value>org.apache.hadoop.io.Text</value> |
|
625 |
</property> |
|
626 |
<property> |
|
627 |
<name>mapreduce.multipleoutputs.namedOutput.${out35}.value</name> |
|
628 |
<value>org.apache.hadoop.io.Text</value> |
|
629 |
</property> |
|
630 |
<property> |
|
631 |
<name>mapreduce.multipleoutputs.namedOutput.${out35}.format</name> |
|
632 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
633 |
</property> |
|
634 |
|
|
635 |
|
|
636 |
<!--resultStats--> |
|
637 |
|
|
638 |
<property> |
|
639 |
<name>mapreduce.multipleoutputs.namedOutput.${out36}.key</name> |
|
640 |
<value>org.apache.hadoop.io.Text</value> |
|
641 |
</property> |
|
642 |
<property> |
|
643 |
<name>mapreduce.multipleoutputs.namedOutput.${out36}.value</name> |
|
644 |
<value>org.apache.hadoop.io.Text</value> |
|
645 |
</property> |
|
646 |
<property> |
|
647 |
<name>mapreduce.multipleoutputs.namedOutput.${out36}.format</name> |
|
648 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
649 |
</property> |
|
650 |
|
|
651 |
|
|
652 |
<!--organizationStats--> |
|
653 |
|
|
654 |
<property> |
|
655 |
<name>mapreduce.multipleoutputs.namedOutput.${out37}.key</name> |
|
656 |
<value>org.apache.hadoop.io.Text</value> |
|
657 |
</property> |
|
658 |
<property> |
|
659 |
<name>mapreduce.multipleoutputs.namedOutput.${out37}.value</name> |
|
660 |
<value>org.apache.hadoop.io.Text</value> |
|
661 |
</property> |
|
662 |
<property> |
|
663 |
<name>mapreduce.multipleoutputs.namedOutput.${out37}.format</name> |
|
664 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
665 |
</property> |
|
666 |
|
|
667 |
|
|
668 |
|
|
600 | 669 |
<!-- ## Classes of mapper and reducer --> |
601 |
|
|
602 |
<property> |
|
603 |
<name>mapreduce.map.class</name> |
|
604 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper</value> |
|
605 |
</property> |
|
606 |
<property> |
|
607 |
<name>mapreduce.reduce.class</name> |
|
608 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer</value> |
|
609 |
</property> |
|
610 |
<property> |
|
611 |
<name>io.serializations</name> |
|
612 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
613 |
</property> |
|
614 | 670 |
|
615 |
<!-- ## Custom config --> |
|
616 |
|
|
617 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
618 |
<property> |
|
619 |
<name>stats.delim</name> |
|
620 |
<value>${Stats_delim_Character}</value> |
|
621 |
</property> |
|
622 |
<!--default string for Null String Values --> |
|
623 |
<property> |
|
624 |
<name>stats.nullString</name> |
|
625 |
<value>${Stats_null_String_Field}</value> |
|
626 |
</property> |
|
627 |
<!--default string for Null Numeric Values --> |
|
628 |
<property> |
|
629 |
<name>stats.nullNum</name> |
|
630 |
<value>${Stats_null_Numeric_Field}</value> |
|
631 |
</property> |
|
632 |
<property> |
|
633 |
<name>stats.enclChar</name> |
|
634 |
<value>${Stats_enclosing_Character}</value> |
|
635 |
</property> |
|
636 |
|
|
637 |
|
|
638 |
<!--source hbase table --> |
|
639 |
<property> |
|
640 |
<name>hbase.mapreduce.inputtable</name> |
|
641 |
<value>${Stats_Hbase_Source_Table}</value> |
|
642 |
</property> |
|
643 |
<property> |
|
644 |
<name>hbase.mapred.inputtable</name> |
|
645 |
<value>${Stats_Hbase_Source_Table}</value> |
|
646 |
</property> |
|
647 |
|
|
648 |
<property> |
|
649 |
<!-- mapping of protos entities to tables in the relDB --> |
|
650 |
<name>stats.dbTablesMap</name> |
|
651 |
<value>${Stats_db_table_map}</value> |
|
652 |
</property> |
|
653 |
|
|
654 |
<!-- This directory does not correspond to a data store. In fact, this directory only |
|
655 |
contains multiple data stores. It has to be set to the name of the workflow node. |
|
656 |
--> |
|
657 |
<property> |
|
658 |
<name>mapred.output.dir</name> |
|
659 |
<value>${Stats_output_Path}</value> |
|
660 |
</property> |
|
661 |
<property> |
|
662 |
<name>stats.indexConf</name> |
|
663 |
<value>${Stats_indexConf}</value> |
|
664 |
</property> |
|
665 |
<!-- ## Workflow node parameters --> |
|
666 |
<property> |
|
667 |
<name>mapred.reduce.tasks</name> |
|
668 |
<value>${numReducers}</value> |
|
669 |
</property> |
|
670 |
|
|
671 |
</configuration> |
|
672 |
</map-reduce> |
|
673 |
<ok to="end" /> |
|
671 |
<property> |
|
672 |
<name>mapreduce.map.class</name> |
|
673 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsMapper</value> |
|
674 |
</property> |
|
675 |
<property> |
|
676 |
<name>mapreduce.reduce.class</name> |
|
677 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.mapreduce.StatsReducer</value> |
|
678 |
</property> |
|
679 |
<property> |
|
680 |
<name>io.serializations</name> |
|
681 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
682 |
</property> |
|
674 | 683 |
|
675 |
<error to="fail" /> |
|
676 |
</action> |
|
677 |
|
|
678 |
<action name='exportContext'> |
|
679 |
<java> |
|
680 |
<prepare> |
|
681 |
</prepare> |
|
682 |
<configuration> |
|
683 |
<property> |
|
684 |
<name>mapred.job.queue.name</name> |
|
685 |
<value>${queueName}</value> |
|
686 |
</property> |
|
687 |
</configuration> |
|
688 |
<main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class> |
|
689 |
<arg>${Stats_output_Path}</arg> |
|
690 |
<arg>${isLookupEndpoint}</arg> |
|
691 |
</java> |
|
692 |
<ok to="prepareDatabase" /> |
|
684 |
<!-- ## Custom config --> |
|
693 | 685 |
|
694 |
<error to="fail" /> |
|
695 |
</action> |
|
696 |
<action name="prepareDatabase"> |
|
697 |
<java> |
|
698 |
<prepare> |
|
699 |
</prepare> |
|
700 |
<configuration> |
|
701 |
<property> |
|
702 |
<name>mapred.job.queue.name</name> |
|
703 |
<value>${queueName}</value> |
|
704 |
</property> |
|
705 |
</configuration> |
|
706 |
|
|
707 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
708 |
<arg>-SworkingDir=${workingDir}</arg> |
|
709 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg> |
|
710 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
711 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
712 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
713 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
|
714 |
|
|
715 |
</java> |
|
686 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
687 |
<property> |
|
688 |
<name>stats.delim</name> |
|
689 |
<value>${Stats_delim_Character}</value> |
|
690 |
</property> |
|
691 |
<!--default string for Null String Values --> |
|
692 |
<property> |
|
693 |
<name>stats.nullString</name> |
|
694 |
<value>${Stats_null_String_Field}</value> |
|
695 |
</property> |
|
696 |
<!--default string for Null Numeric Values --> |
|
697 |
<property> |
|
698 |
<name>stats.nullNum</name> |
|
699 |
<value>${Stats_null_Numeric_Field}</value> |
|
700 |
</property> |
|
701 |
<property> |
|
702 |
<name>stats.enclChar</name> |
|
703 |
<value>${Stats_enclosing_Character}</value> |
|
704 |
</property> |
|
716 | 705 |
|
717 |
<ok to="sqoopImport" /> |
|
718 |
<error to="fail" /> |
|
719 |
</action> |
|
720 |
|
|
721 |
<action name="sqoopImport"> |
|
722 |
<java> |
|
723 |
<prepare> |
|
724 |
</prepare> |
|
725 |
<configuration> |
|
726 |
<property> |
|
727 |
<name>mapred.job.queue.name</name> |
|
728 |
<value>${queueName}</value> |
|
729 |
</property> |
|
730 |
|
|
731 |
<property> |
|
732 |
<name>oozie.sqoop.log.level</name> |
|
733 |
<value>DEBUG</value> |
|
734 |
</property> |
|
735 |
|
|
736 |
</configuration> |
|
737 |
|
|
738 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
739 |
<arg>-SworkingDir=${workingDir}</arg> |
|
740 |
<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg> |
|
741 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
742 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
743 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
744 |
|
|
745 |
<arg>-PStats_output_Path=${Stats_output_Path}</arg> |
|
746 |
<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg> |
|
747 |
<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg> |
|
748 |
<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg> |
|
749 |
<arg>-PStats_delim_Character=${Stats_delim_Character}</arg> |
|
750 |
<arg>-PStats_db_table_map=${Stats_db_table_map}</arg> |
|
751 |
<arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg> |
|
752 |
|
|
753 |
</java> |
|
754 |
<ok to="end" /> |
|
755 |
|
|
756 | 706 |
|
757 |
<error to="fail" /> |
|
758 |
</action> |
|
759 |
|
|
760 |
<action name="finalizeDatabase"> |
|
761 |
<java> |
|
762 |
<prepare> |
|
763 |
</prepare> |
|
764 |
<configuration> |
|
765 |
<property> |
|
766 |
<name>mapred.job.queue.name</name> |
|
767 |
<value>${queueName}</value> |
|
768 |
</property> |
|
769 |
</configuration> |
|
770 |
|
|
771 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
772 |
<arg>-SworkingDir=${workingDir}</arg> |
|
773 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg> |
|
774 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
775 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
776 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
777 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
|
778 |
</java> |
|
779 |
<ok to="cleanUpHDFS" /> |
|
780 |
<error to="fail" /> |
|
781 |
</action> |
|
707 |
<!--source hbase table --> |
|
708 |
<property> |
|
709 |
<name>hbase.mapreduce.inputtable</name> |
|
710 |
<value>${Stats_Hbase_Source_Table}</value> |
|
711 |
</property> |
|
712 |
<property> |
|
713 |
<name>hbase.mapred.inputtable</name> |
|
714 |
<value>${Stats_Hbase_Source_Table}</value> |
|
715 |
</property> |
|
782 | 716 |
|
783 |
<action name="generateReports"> |
|
717 |
<property> |
|
718 |
<!-- mapping of protos entities to tables in the relDB --> |
|
719 |
<name>stats.dbTablesMap</name> |
|
720 |
<value>${Stats_db_table_map}</value> |
|
721 |
</property> |
|
722 |
|
|
723 |
<!-- This directory does not correspond to a data store. In fact, this directory only |
|
724 |
contains multiple data stores. It has to be set to the name of the workflow node. |
|
725 |
--> |
|
726 |
<property> |
|
727 |
<name>mapred.output.dir</name> |
|
728 |
<value>${Stats_output_Path}</value> |
|
729 |
</property> |
|
730 |
<property> |
|
731 |
<name>stats.indexConf</name> |
|
732 |
<value>${Stats_indexConf}</value> |
|
733 |
</property> |
|
734 |
<!-- ## Workflow node parameters --> |
|
735 |
<property> |
|
736 |
<name>mapred.reduce.tasks</name> |
|
737 |
<value>${numReducers}</value> |
|
738 |
</property> |
|
739 |
|
|
740 |
</configuration> |
|
741 |
</map-reduce> |
|
742 |
<ok to="exportContext"/> |
|
743 |
|
|
744 |
<error to="fail"/> |
|
745 |
</action> |
|
746 |
|
|
747 |
<action name='exportUsageStats'> |
|
784 | 748 |
<java> |
785 | 749 |
<prepare> |
786 | 750 |
</prepare> |
... | ... | |
790 | 754 |
<value>${queueName}</value> |
791 | 755 |
</property> |
792 | 756 |
</configuration> |
757 |
<main-class>eu.dnetlib.iis.core.workflows.stats.ContextExportWrapper</main-class> |
|
758 |
<arg>${Stats_output_Path}</arg> |
|
759 |
<arg>${isLookupEndpoint}</arg> |
|
760 |
</java> |
|
761 |
<ok to="exportUsageStats"/> |
|
793 | 762 |
|
763 |
<error to="fail"/> |
|
764 |
</action> |
|
765 |
|
|
766 |
<action name='exportUsageStats'> |
|
767 |
<java> |
|
768 |
<prepare> |
|
769 |
</prepare> |
|
770 |
<configuration> |
|
771 |
<property> |
|
772 |
<name>mapred.job.queue.name</name> |
|
773 |
<value>${queueName}</value> |
|
774 |
</property> |
|
775 |
</configuration> |
|
776 |
<main-class>eu.dnetlib.iis.core.workflows.stats.UsageStatsExportWrapper</main-class> |
|
777 |
<arg>${Stats_usageDB_url}</arg> |
|
778 |
<arg>${Stats_usageDB_Driver}</arg> |
|
779 |
<arg>${Stats_delim_Character}</arg> |
|
780 |
<arg>${Stats_output_Path}</arg> |
|
781 |
<arg>${Stats_usageDB_entities}</arg> |
|
782 |
</java> |
|
783 |
|
|
784 |
<ok to="sqoopImport"/> |
|
785 |
<error to="fail"/> |
|
786 |
</action> |
|
787 |
|
|
788 |
<action name="prepareDatabase"> |
|
789 |
<java> |
|
790 |
<prepare> |
|
791 |
</prepare> |
|
792 |
<configuration> |
|
793 |
<property> |
|
794 |
<name>mapred.job.queue.name</name> |
|
795 |
<value>${queueName}</value> |
|
796 |
</property> |
|
797 |
</configuration> |
|
798 |
|
|
794 | 799 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
795 | 800 |
<arg>-SworkingDir=${workingDir}</arg> |
796 |
<arg>eu.dnetlib.iis.core.workflows.stats.ReportWrapper</arg>
|
|
801 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
|
|
797 | 802 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
798 | 803 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
799 | 804 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
800 | 805 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
801 |
<arg>-PReport_Path=/tmp/</arg> |
|
806 |
|
|
802 | 807 |
</java> |
803 |
<ok to="end" /> |
|
804 |
<error to="fail" /> |
|
808 |
|
|
809 |
<ok to="sqoopImport"/> |
|
810 |
<error to="fail"/> |
|
805 | 811 |
</action> |
806 |
|
|
807 |
<action name="cleanUpHDFS"> |
|
808 |
<fs> |
|
809 |
|
|
810 |
<delete path="${nameNode}${Stats_output_Path}" /> |
|
811 |
|
|
812 |
</fs> |
|
813 |
<ok to="end" /> |
|
814 |
<error to="fail" /> |
|
815 |
</action> |
|
816 |
<kill name="fail"> |
|
817 |
<message> |
|
818 |
Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}] |
|
819 |
</message> |
|
820 |
</kill> |
|
821 |
<end name="end" /> |
|
812 |
|
|
813 |
<action name="sqoopImport"> |
|
814 |
<java> |
|
815 |
<prepare> |
|
816 |
</prepare> |
|
817 |
<configuration> |
|
818 |
<property> |
|
819 |
<name>mapred.job.queue.name</name> |
|
820 |
<value>${queueName}</value> |
|
821 |
</property> |
|
822 |
|
|
823 |
<property> |
|
824 |
<name>oozie.sqoop.log.level</name> |
|
825 |
<value>DEBUG</value> |
|
826 |
</property> |
|
827 |
|
|
828 |
</configuration> |
|
829 |
|
|
830 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
831 |
<arg>-SworkingDir=${workingDir}</arg> |
|
832 |
<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg> |
|
833 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
834 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
835 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
836 |
|
|
837 |
<arg>-PStats_output_Path=${Stats_output_Path}</arg> |
|
838 |
<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg> |
|
839 |
<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg> |
|
840 |
<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg> |
|
841 |
<arg>-PStats_delim_Character=${Stats_delim_Character}</arg> |
|
842 |
<arg>-PStats_db_table_map=${Stats_db_table_map}</arg> |
|
843 |
<arg>-PStats_enclosing_Character=${Stats_enclosing_Character}</arg> |
|
844 |
|
|
845 |
</java> |
|
846 |
<ok to="end"/> |
|
847 |
|
|
848 |
|
|
849 |
<error to="fail"/> |
|
850 |
</action> |
|
851 |
|
|
852 |
<action name="finalizeDatabase"> |
|
853 |
<java> |
|
854 |
<prepare> |
|
855 |
</prepare> |
|
856 |
<configuration> |
|
857 |
<property> |
|
858 |
<name>mapred.job.queue.name</name> |
|
859 |
<value>${queueName}</value> |
|
860 |
</property> |
|
861 |
</configuration> |
|
862 |
|
|
863 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
864 |
<arg>-SworkingDir=${workingDir}</arg> |
|
865 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg> |
|
866 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
867 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
868 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
869 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
|
870 |
</java> |
|
871 |
<ok to="cleanUpHDFS"/> |
|
872 |
<error to="fail"/> |
|
873 |
</action> |
|
874 |
|
|
875 |
<action name="cleanUpHDFS"> |
|
876 |
<fs> |
|
877 |
|
|
878 |
<delete path="${nameNode}${Stats_output_Path}"/> |
|
879 |
|
|
880 |
</fs> |
|
881 |
<ok to="end"/> |
|
882 |
<error to="fail"/> |
|
883 |
</action> |
|
884 |
<kill name="fail"> |
|
885 |
<message> |
|
886 |
Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}] |
|
887 |
</message> |
|
888 |
</kill> |
|
889 |
<end name="end"/> |
|
822 | 890 |
</workflow-app> |
Also available in: Unified diff
updates for usage statistics