1 |
1 |
<workflow-app name="test-core_examples_javamapreduce_stats"
|
2 |
2 |
xmlns="uri:oozie:workflow:0.4">
|
3 |
|
<!-- map reduce job that exports hbase data and prepares them for import to the relation
|
4 |
|
database used for statistics generation -->
|
5 |
|
|
|
3 |
<!-- map reduce job that exports hbase data and prepares them for import
|
|
4 |
to the relation database used for statistics generation -->
|
|
5 |
|
6 |
6 |
<global>
|
7 |
7 |
<job-tracker>${jobTracker}</job-tracker>
|
8 |
8 |
<name-node>${nameNode}</name-node>
|
... | ... | |
17 |
17 |
</property>
|
18 |
18 |
</configuration>
|
19 |
19 |
</global>
|
20 |
|
<start to='sqoopImport' />
|
|
20 |
<start to='get-scanner' />
|
21 |
21 |
<action name='get-scanner'>
|
22 |
22 |
<java>
|
23 |
|
<main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator</main-class>
|
24 |
|
|
25 |
|
<!-- index.conf {
|
26 |
|
result { dups = true, links = [
|
27 |
|
{ relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },
|
28 |
|
{ relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },
|
29 |
|
{ relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },
|
30 |
|
{ relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },
|
31 |
|
{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] },
|
32 |
|
{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }
|
33 |
|
]},
|
34 |
|
person { dups = false, links = [
|
35 |
|
{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] },
|
36 |
|
{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] }
|
37 |
|
]},
|
38 |
|
datasource { dups = false, links = [
|
39 |
|
{ relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] }
|
40 |
|
]},
|
41 |
|
organization { dups = false, links = [
|
42 |
|
{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] },
|
43 |
|
{ relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }
|
44 |
|
]},
|
45 |
|
project { dups = false, links = [
|
46 |
|
{ relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] },
|
47 |
|
{ relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] },
|
48 |
|
{ relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }
|
49 |
|
]}} -->
|
|
23 |
<main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator
|
|
24 |
</main-class>
|
50 |
25 |
<!-- column families: -->
|
51 |
|
|
52 |
|
<arg>-f
|
53 |
|
datasource ,datasourceOrganization_provision_provides ,organization,
|
54 |
|
<!-- projectOrganization_participation_isParticipant, -->
|
55 |
|
project
|
56 |
|
<!-- ,projectOrganization_participation_hasParticipant -->
|
57 |
|
, result, resultProject_outcome_produces,
|
58 |
|
personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo
|
59 |
|
</arg>
|
60 |
|
|
61 |
|
<capture-output />
|
|
26 |
|
|
27 |
<arg>
|
|
28 |
-f
|
|
29 |
datasource
|
|
30 |
<!-- , datasourceOrganization_provision_provides ,organization, -->
|
|
31 |
<!-- projectOrganization_participation_isParticipant, -->
|
|
32 |
<!-- project -->
|
|
33 |
<!-- ,projectOrganization_participation_hasParticipant -->
|
|
34 |
<!-- , -->
|
|
35 |
<!-- result -->
|
|
36 |
<!-- , resultProject_outcome_produces, -->
|
|
37 |
<!-- personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo -->
|
|
38 |
</arg>
|
|
39 |
|
|
40 |
<capture-output />
|
62 |
41 |
</java>
|
63 |
42 |
<ok to="mr_export" />
|
64 |
43 |
<error to="fail" />
|
65 |
44 |
</action>
|
66 |
45 |
<action name="mr_export">
|
67 |
46 |
<map-reduce>
|
68 |
|
|
|
47 |
|
69 |
48 |
<prepare>
|
70 |
49 |
<delete path="${nameNode}${Stats_output_Path}" />
|
71 |
|
|
|
50 |
|
72 |
51 |
</prepare>
|
73 |
52 |
<configuration>
|
74 |
53 |
<property>
|
... | ... | |
79 |
58 |
<name>hbase.rootdir</name>
|
80 |
59 |
<value>hdfs://nmis-hadoop-cluster/hbase</value>
|
81 |
60 |
</property>
|
82 |
|
|
|
61 |
|
83 |
62 |
<property>
|
84 |
63 |
<name>hbase.security.authentication</name>
|
85 |
64 |
<value>simple</value>
|
86 |
65 |
</property>
|
87 |
66 |
<!-- ZOOKEEPER -->
|
88 |
|
|
|
67 |
|
89 |
68 |
<property>
|
90 |
69 |
<name>hbase.zookeeper.quorum</name>
|
91 |
70 |
<value>
|
... | ... | |
96 |
75 |
<name>zookeeper.znode.rootserver</name>
|
97 |
76 |
<value>root-region-server</value>
|
98 |
77 |
</property>
|
99 |
|
|
|
78 |
|
100 |
79 |
<property>
|
101 |
80 |
<name>hbase.zookeeper.property.clientPort</name>
|
102 |
81 |
<value>2182</value>
|
103 |
82 |
</property>
|
104 |
|
|
105 |
|
|
|
83 |
|
|
84 |
|
106 |
85 |
<!-- MR IO -->
|
107 |
|
|
108 |
|
|
|
86 |
|
|
87 |
|
109 |
88 |
<property>
|
110 |
89 |
<name>mapreduce.inputformat.class</name>
|
111 |
90 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
|
112 |
91 |
</property>
|
113 |
|
|
|
92 |
|
114 |
93 |
<property>
|
115 |
94 |
<name>mapred.mapoutput.key.class</name>
|
116 |
95 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
125 |
104 |
</property>
|
126 |
105 |
<property>
|
127 |
106 |
<name>mapred.output.value.class</name>
|
128 |
|
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
|
|
107 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
|
|
108 |
</value>
|
129 |
109 |
</property>
|
130 |
|
|
|
110 |
|
131 |
111 |
<!-- ## This is required for new MapReduce API usage -->
|
132 |
112 |
<property>
|
133 |
113 |
<name>mapred.mapper.new-api</name>
|
... | ... | |
137 |
117 |
<name>mapred.reducer.new-api</name>
|
138 |
118 |
<value>true</value>
|
139 |
119 |
</property>
|
140 |
|
|
|
120 |
|
141 |
121 |
<!-- # Job-specific options -->
|
142 |
122 |
<property>
|
143 |
123 |
<name>dfs.blocksize</name>
|
... | ... | |
159 |
139 |
<name>mapreduce.map.speculative</name>
|
160 |
140 |
<value>false</value>
|
161 |
141 |
</property>
|
162 |
|
|
|
142 |
|
163 |
143 |
<!-- I/O FORMAT -->
|
164 |
|
<!-- IMPORTANT: sets default delimeter used by text output writer.
|
165 |
|
Required to fix issue with traling tab added between id and value in multiple outputs -->
|
166 |
|
<property>
|
|
144 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required
|
|
145 |
to fix issue with traling tab added between id and value in multiple outputs -->
|
|
146 |
<property>
|
167 |
147 |
<name>mapred.textoutputformat.separator</name>
|
168 |
148 |
<value>${Stats_delim_Character}</value>
|
169 |
149 |
</property>
|
170 |
150 |
<!-- ## Names of all output ports -->
|
171 |
|
|
|
151 |
|
172 |
152 |
<property>
|
173 |
153 |
<name>mapreduce.multipleoutputs</name>
|
174 |
|
|
175 |
|
<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}</value>
|
176 |
|
|
|
154 |
|
|
155 |
<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7}
|
|
156 |
${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14}
|
|
157 |
${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}
|
|
158 |
</value>
|
|
159 |
|
177 |
160 |
</property>
|
178 |
161 |
<!-- datasource -->
|
179 |
162 |
<property>
|
... | ... | |
186 |
169 |
</property>
|
187 |
170 |
<property>
|
188 |
171 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
|
189 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
172 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
173 |
</value>
|
190 |
174 |
</property>
|
191 |
175 |
<!-- datasourceLanguage -->
|
192 |
176 |
<property>
|
... | ... | |
199 |
183 |
</property>
|
200 |
184 |
<property>
|
201 |
185 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
|
202 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
186 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
187 |
</value>
|
203 |
188 |
</property>
|
204 |
|
|
205 |
|
|
206 |
|
|
|
189 |
|
|
190 |
|
|
191 |
|
207 |
192 |
<!-- datasourceOrganization -->
|
208 |
193 |
<property>
|
209 |
194 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
|
... | ... | |
215 |
200 |
</property>
|
216 |
201 |
<property>
|
217 |
202 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
|
218 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
203 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
204 |
</value>
|
219 |
205 |
</property>
|
220 |
|
|
221 |
|
<!-- datasourceTopic -->
|
|
206 |
|
|
207 |
<!-- datasourceTopic -->
|
222 |
208 |
<property>
|
223 |
209 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
|
224 |
210 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
229 |
215 |
</property>
|
230 |
216 |
<property>
|
231 |
217 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
|
232 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
218 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
219 |
</value>
|
233 |
220 |
</property>
|
234 |
|
|
235 |
|
<!-- resultDatasource -->
|
|
221 |
|
|
222 |
<!-- resultDatasource -->
|
236 |
223 |
<property>
|
237 |
224 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
|
238 |
225 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
243 |
230 |
</property>
|
244 |
231 |
<property>
|
245 |
232 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
|
246 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
233 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
234 |
</value>
|
247 |
235 |
</property>
|
248 |
|
<!-- organization -->
|
|
236 |
<!-- organization -->
|
249 |
237 |
<property>
|
250 |
238 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
|
251 |
239 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
256 |
244 |
</property>
|
257 |
245 |
<property>
|
258 |
246 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
|
259 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
247 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
248 |
</value>
|
260 |
249 |
</property>
|
261 |
|
|
262 |
|
<!-- projectOrganization -->
|
|
250 |
|
|
251 |
<!-- projectOrganization -->
|
263 |
252 |
<property>
|
264 |
253 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
|
265 |
254 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
270 |
259 |
</property>
|
271 |
260 |
<property>
|
272 |
261 |
<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
|
273 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
262 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
263 |
</value>
|
274 |
264 |
</property>
|
275 |
|
<!-- resultProject -->
|
|
265 |
<!-- resultProject -->
|
276 |
266 |
<property>
|
277 |
267 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
|
278 |
268 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
283 |
273 |
</property>
|
284 |
274 |
<property>
|
285 |
275 |
<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
|
286 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
276 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
277 |
</value>
|
287 |
278 |
</property>
|
288 |
|
|
289 |
|
<!-- project -->
|
|
279 |
|
|
280 |
<!-- project -->
|
290 |
281 |
<property>
|
291 |
282 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
|
292 |
283 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
297 |
288 |
</property>
|
298 |
289 |
<property>
|
299 |
290 |
<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
|
300 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
291 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
292 |
</value>
|
301 |
293 |
</property>
|
302 |
|
|
303 |
|
<!-- resultConcept -->
|
|
294 |
|
|
295 |
<!-- resultConcept -->
|
304 |
296 |
<property>
|
305 |
297 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
|
306 |
298 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
311 |
303 |
</property>
|
312 |
304 |
<property>
|
313 |
305 |
<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
|
314 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
306 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
307 |
</value>
|
315 |
308 |
</property>
|
316 |
|
|
|
309 |
|
317 |
310 |
<!-- resultClaim -->
|
318 |
311 |
<property>
|
319 |
312 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
|
... | ... | |
325 |
318 |
</property>
|
326 |
319 |
<property>
|
327 |
320 |
<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
|
328 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
321 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
322 |
</value>
|
329 |
323 |
</property>
|
330 |
|
|
|
324 |
|
331 |
325 |
<!-- resultClassification -->
|
332 |
326 |
<property>
|
333 |
327 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
|
... | ... | |
339 |
333 |
</property>
|
340 |
334 |
<property>
|
341 |
335 |
<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
|
342 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
336 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
337 |
</value>
|
343 |
338 |
</property>
|
344 |
|
|
|
339 |
|
345 |
340 |
<!-- resultLanguage -->
|
346 |
341 |
<property>
|
347 |
342 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
|
... | ... | |
353 |
348 |
</property>
|
354 |
349 |
<property>
|
355 |
350 |
<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
|
356 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
351 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
352 |
</value>
|
357 |
353 |
</property>
|
358 |
|
|
359 |
|
<!-- resultProject -->
|
|
354 |
|
|
355 |
<!-- resultProject -->
|
360 |
356 |
<property>
|
361 |
357 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
|
362 |
358 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
367 |
363 |
</property>
|
368 |
364 |
<property>
|
369 |
365 |
<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
|
370 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
366 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
367 |
</value>
|
371 |
368 |
</property>
|
372 |
|
<!-- resultResult -->
|
|
369 |
<!-- resultResult -->
|
373 |
370 |
<property>
|
374 |
371 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
|
375 |
372 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
380 |
377 |
</property>
|
381 |
378 |
<property>
|
382 |
379 |
<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
|
383 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
380 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
381 |
</value>
|
384 |
382 |
</property>
|
385 |
|
<!-- resultTopic -->
|
|
383 |
<!-- resultTopic -->
|
386 |
384 |
<property>
|
387 |
385 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
|
388 |
386 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
393 |
391 |
</property>
|
394 |
392 |
<property>
|
395 |
393 |
<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
|
396 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
394 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
395 |
</value>
|
397 |
396 |
</property>
|
398 |
|
|
399 |
|
<!-- resultDatasource -->
|
|
397 |
|
|
398 |
<!-- resultDatasource -->
|
400 |
399 |
<property>
|
401 |
400 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
|
402 |
401 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
407 |
406 |
</property>
|
408 |
407 |
<property>
|
409 |
408 |
<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
|
410 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
409 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
410 |
</value>
|
411 |
411 |
</property>
|
412 |
|
|
413 |
|
|
414 |
|
|
415 |
|
<!-- result -->
|
|
412 |
|
|
413 |
|
|
414 |
|
|
415 |
<!-- result -->
|
416 |
416 |
<property>
|
417 |
417 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
|
418 |
418 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
423 |
423 |
</property>
|
424 |
424 |
<property>
|
425 |
425 |
<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
|
426 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
426 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
427 |
</value>
|
427 |
428 |
</property>
|
428 |
|
|
429 |
|
<!-- claim -->
|
|
429 |
|
|
430 |
<!-- claim -->
|
430 |
431 |
<property>
|
431 |
432 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
|
432 |
433 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
437 |
438 |
</property>
|
438 |
439 |
<property>
|
439 |
440 |
<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
|
440 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
441 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
442 |
</value>
|
441 |
443 |
</property>
|
442 |
|
<!-- cncept -->
|
|
444 |
<!-- cncept -->
|
443 |
445 |
<property>
|
444 |
446 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
|
445 |
447 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
450 |
452 |
</property>
|
451 |
453 |
<property>
|
452 |
454 |
<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
|
453 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
455 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
456 |
</value>
|
454 |
457 |
</property>
|
455 |
|
|
456 |
|
<!-- category -->
|
|
458 |
|
|
459 |
<!-- category -->
|
457 |
460 |
<property>
|
458 |
461 |
<name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
|
459 |
462 |
<value>org.apache.hadoop.io.Text</value>
|
... | ... | |
464 |
467 |
</property>
|
465 |
468 |
<property>
|
466 |
469 |
<name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
|
467 |
|
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
470 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
|
|
471 |
</value>
|
468 |
472 |
</property>
|
469 |
473 |
<!-- ## Classes of mapper and reducer -->
|
470 |
|
|
|
474 |
|
471 |
475 |
<property>
|
472 |
476 |
<name>mapreduce.map.class</name>
|
473 |
|
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper</value>
|
|
477 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper
|
|
478 |
</value>
|
474 |
479 |
</property>
|
475 |
480 |
<property>
|
476 |
481 |
<name>mapreduce.reduce.class</name>
|
477 |
|
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer</value>
|
|
482 |
<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer
|
|
483 |
</value>
|
478 |
484 |
</property>
|
479 |
485 |
<property>
|
480 |
486 |
<name>io.serializations</name>
|
481 |
|
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
|
487 |
<value>org.apache.hadoop.io.serializer.WritableSerialization
|
|
488 |
</value>
|
482 |
489 |
</property>
|
483 |
490 |
<!-- ## Custom config -->
|
484 |
|
|
|
491 |
|
485 |
492 |
<!--delim character used to seperate fields in hdfs dump files <property> -->
|
486 |
493 |
<property>
|
487 |
494 |
<name>stats.delim</name>
|
... | ... | |
502 |
509 |
<name>hbase.mapreduce.inputtable</name>
|
503 |
510 |
<value>${Stats_Hbase_Source_Table}</value>
|
504 |
511 |
</property>
|
505 |
|
<property>
|
506 |
|
<name>indexConf</name>
|
507 |
|
<value>${indexConf}</value>
|
|
512 |
<property>
|
|
513 |
<!-- mapping of protos entities to tables in the relDB -->
|
|
514 |
<name>stats.dbTablesMap</name>
|
|
515 |
<value>${Stats_db_table_map}</value>
|
508 |
516 |
</property>
|
509 |
|
|
510 |
|
|
511 |
|
<!-- This directory does not correspond to a data store. In fact, this directory only
|
512 |
|
contains multiple data stores. It has to be set to the name of the workflow node.
|
513 |
|
-->
|
|
517 |
|
|
518 |
<!-- This directory does not correspond to a data store. In fact, this
|
|
519 |
directory only contains multiple data stores. It has to be set to the name
|
|
520 |
of the workflow node. -->
|
514 |
521 |
<property>
|
515 |
522 |
<name>mapred.output.dir</name>
|
516 |
523 |
<value>${Stats_output_Path}</value>
|
517 |
524 |
</property>
|
|
525 |
<property>
|
|
526 |
<name>stats.indexConf</name>
|
|
527 |
<value>${Stats_indexConf}</value>
|
|
528 |
</property>
|
518 |
529 |
<!-- ## Workflow node parameters -->
|
519 |
530 |
<property>
|
520 |
531 |
<name>mapred.reduce.tasks</name>
|
521 |
|
<value>10</value>
|
|
532 |
<value>${numReducers}</value>
|
522 |
533 |
</property>
|
523 |
|
|
524 |
|
|
525 |
|
|
526 |
|
|
527 |
|
|
528 |
534 |
</configuration>
|
529 |
535 |
</map-reduce>
|
530 |
|
<ok to="prepareDatabase" />
|
|
536 |
<ok to="sqoopImport" />
|
531 |
537 |
<error to="fail" />
|
532 |
538 |
</action>
|
533 |
|
|
|
539 |
|
534 |
540 |
<action name="prepareDatabase">
|
535 |
541 |
<java>
|
536 |
|
|
|
542 |
|
537 |
543 |
<prepare>
|
538 |
544 |
</prepare>
|
539 |
545 |
<configuration>
|
... | ... | |
542 |
548 |
<value>${queueName}</value>
|
543 |
549 |
</property>
|
544 |
550 |
</configuration>
|
545 |
|
|
|
551 |
|
546 |
552 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
|
547 |
553 |
<arg>-SworkingDir=${workingDir}</arg>
|
548 |
554 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
|
549 |
|
|
|
555 |
|
550 |
556 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg>
|
551 |
557 |
<arg>-PStats_db_User=${Stats_db_User}</arg>
|
552 |
558 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
|
553 |
559 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
|
554 |
|
|
|
560 |
|
555 |
561 |
</java>
|
556 |
562 |
<ok to="sqoopImport" />
|
557 |
563 |
<error to="fail" />
|
558 |
564 |
</action>
|
559 |
|
|
|
565 |
|
560 |
566 |
<action name="sqoopImport">
|
561 |
567 |
<java>
|
562 |
568 |
<prepare>
|
... | ... | |
566 |
572 |
<name>mapred.job.queue.name</name>
|
567 |
573 |
<value>${queueName}</value>
|
568 |
574 |
</property>
|
569 |
|
|
|
575 |
|
570 |
576 |
<property>
|
571 |
577 |
<name>oozie.sqoop.log.level</name>
|
572 |
578 |
<value>DEBUG</value>
|
573 |
579 |
</property>
|
574 |
|
|
|
580 |
|
575 |
581 |
</configuration>
|
576 |
|
|
|
582 |
|
577 |
583 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
|
578 |
584 |
<arg>-SworkingDir=${workingDir}</arg>
|
579 |
585 |
<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
|
580 |
|
|
|
586 |
|
581 |
587 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg>
|
582 |
588 |
<arg>-PStats_db_User=${Stats_db_User}</arg>
|
583 |
589 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
|
584 |
|
|
|
590 |
|
585 |
591 |
<arg>-PStats_output_Path=${Stats_output_Path}</arg>
|
586 |
|
<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
|
|
592 |
<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}
|
|
593 |
</arg>
|
587 |
594 |
<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
|
588 |
|
<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
|
|
595 |
<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}
|
|
596 |
</arg>
|
589 |
597 |
<arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
|
590 |
|
</java>
|
|
598 |
<arg>-PStats_newline_Character=${Stats_newline_Character}</arg>
|
|
599 |
<arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
|
|
600 |
</java>
|
591 |
601 |
<ok to="end" />
|
592 |
602 |
<error to="fail" />
|
593 |
603 |
</action>
|
594 |
|
|
|
604 |
|
595 |
605 |
<action name="finalizeDatabase">
|
596 |
606 |
<java>
|
597 |
|
|
|
607 |
|
598 |
608 |
<prepare>
|
599 |
609 |
</prepare>
|
600 |
610 |
<configuration>
|
... | ... | |
603 |
613 |
<value>${queueName}</value>
|
604 |
614 |
</property>
|
605 |
615 |
</configuration>
|
606 |
|
|
|
616 |
|
607 |
617 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
|
608 |
618 |
<arg>-SworkingDir=${workingDir}</arg>
|
609 |
619 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
|
610 |
620 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg>
|
611 |
621 |
<arg>-PStats_db_User=${Stats_db_User}</arg>
|
612 |
622 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
|
613 |
|
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
|
614 |
|
<arg>-PStats_output_Path=${Stats_output_Path}</arg>
|
|
623 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
|
|
624 |
<arg>-PStats_output_Path=${Stats_output_Path}</arg>
|
|
625 |
<arg>-PStats_ContextResourceXML=${ContextResourceXML}</arg>
|
|
626 |
|
615 |
627 |
</java>
|
616 |
628 |
<ok to="end" />
|
617 |
629 |
<error to="fail" />
|
... | ... | |
621 |
633 |
<job-tracker>${jobTracker}</job-tracker>
|
622 |
634 |
<name-node>${nameNode}</name-node>
|
623 |
635 |
<prepare>
|
624 |
|
</prepare>
|
|
636 |
</prepare>
|
625 |
637 |
<configuration>
|
626 |
|
<property>
|
627 |
|
<name>mapred.job.queue.name</name>
|
628 |
|
<value>${queueName}</value>
|
629 |
|
</property>
|
630 |
638 |
<property>
|
|
639 |
<name>mapred.job.queue.name</name>
|
|
640 |
<value>${queueName}</value>
|
|
641 |
</property>
|
|
642 |
<property>
|
631 |
643 |
<name>oozie.sqoop.log.level</name>
|
632 |
644 |
<value>DEBUG</value>
|
633 |
645 |
</property>
|
634 |
646 |
</configuration>
|
635 |
|
<command> export -Dsqoop.export.records.per.statement=1000 -Dsqoop.statements.per.transaction==1000 --connect jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats --export-dir /tmp/test_stats/datasource-r-00000 --table datasource --username sqoop --password sqoop --input-fields-terminated-by ! -m 4
|
636 |
|
</command>
|
637 |
|
|
|
647 |
<command> export -Dsqoop.export.records.per.statement=1000
|
|
648 |
-Dsqoop.statements.per.transaction==1000 --connect
|
|
649 |
jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats --export-dir
|
|
650 |
/tmp/test_stats/datasource-r-00000 --table datasource --username
|
|
651 |
sqoop --password sqoop --input-fields-terminated-by ! -m 4
|
|
652 |
</command>
|
|
653 |
|
638 |
654 |
</sqoop>
|
639 |
655 |
<ok to="end" />
|
640 |
656 |
<error to="fail" />
|
641 |
657 |
</action>
|
642 |
|
|
|
658 |
|
643 |
659 |
<kill name="fail">
|
644 |
660 |
<message>
|
645 |
|
Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
|
|
661 |
Unfortunately, the process failed -- error message:
|
|
662 |
[${wf:errorMessage(wf:lastErrorNode())}]
|
646 |
663 |
</message>
|
647 |
664 |
</kill>
|
648 |
665 |
<end name="end" />
|