Project

General

Profile

« Previous | Next » 

Revision 29327

Added by Eri Katsari over 10 years ago

View differences:

modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/examples/javamapreduce/stats/job.properties
7 7
jobTracker=nmis-hadoop-jt
8 8
queueName=default 
9 9
user.name=eri.katsari 
10
Stats_db_Url = jdbc:postgresql://duffy.di.uoa.gr:5432
10
numReducers=1    
11
Stats_db_Url = jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats
11 12
Stats_db_User = sqoop
12 13
Stats_db_Pass = sqoop
13
Stats_db_Name =test_stats
14 14
Stats_db_Driver = org.postgresql.Driver
15 15
Stats_sqoop_RecsPerStatement = 1000
16 16
Stats_sqoop_StatementPerTrans = 1000
17 17
Stats_sqoop_ReducersCount=4
18 18
Stats_Hbase_Source_Table=db_openaireplus_node6_t
19
Stats_output_Path=/tmp/test_stats/
19
Stats_output_Path=/tmp/test_stats3/
20 20
Stats_null_String_Field=NULL
21 21
Stats_null_Numeric_Field=-1
22 22
Stats_delim_Character=!
23
Stats_db_table_map=datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultResult=result_results,resultTopic=result_topics,category=category,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources
23 24
out1=datasource 
24 25
out2=project 
25 26
out3=organization 
......
41 42
out19=claim 
42 43
out20=concept
43 44
out21=category 
44
 
45
Stats_indexConf=index.conf { result { dups = true, links = [{ relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] },{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]},organization { dups = false, links = [{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
46
ContextResourceXML=<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="context.xsl"?><RESOURCE_PROFILE><HEADER><RESOURCE_IDENTIFIER value="1284f828-6c12-4905-a9c9-43f143b36e08_Q29udGV4dERTUmVzb3VyY2VzL0NvbnRleHREU1Jlc291cmNlVHlwZQ=="/><RESOURCE_TYPE value="ContextDSResourceType"/><RESOURCE_KIND value="ContextDSResources"/><RESOURCE_URI value=""/><DATE_OF_CREATION value=""/></HEADER><BODY><CONFIGURATION><context id="egi" type="community" label="EGI"><category claim="true" label="Scientific Disciplines" id="egi::classification"><concept label="Natural Sciences" claim="true" id="egi::classification::natsc"><param name="originalID">1</param><concept label="Mathematics" claim="true" id="egi::classification::natsc::math"><param name="originalID">1.1</param><concept label="Applied Mathematics" id="egi::classification::natsc::math::applied" claim="true"><param name="originalID">1.1.1</param></concept></concept></category></context></CONFIGURATION><STATUS/><SECURITY_PARAMETERS/></BODY></RESOURCE_PROFILE>
modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/examples/javamapreduce/stats/oozie_app/workflow.xml
1 1
<workflow-app name="test-core_examples_javamapreduce_stats"
2 2
	xmlns="uri:oozie:workflow:0.4">
3
	<!-- map reduce job that exports hbase data and prepares them for import to the relation
4
		database used for statistics generation -->
5
	
3
	<!-- map reduce job that exports hbase data and prepares them for import 
4
		to the relation database used for statistics generation -->
5

  
6 6
	<global>
7 7
		<job-tracker>${jobTracker}</job-tracker>
8 8
		<name-node>${nameNode}</name-node>
......
17 17
			</property>
18 18
		</configuration>
19 19
	</global>
20
	<start to='sqoopImport' />
20
	<start to='get-scanner' />
21 21
	<action name='get-scanner'>
22 22
		<java>
23
			 <main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator</main-class>
24
			
25
		<!--  index.conf {  
26
			result { dups = true, links = [
27
			{ relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, 
28
			{ relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, 
29
			{ relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, 
30
			{ relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },
31
			{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] },
32
			{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }
33
			]}, 
34
			person { dups = false, links = [
35
			{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, 
36
			{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } 
37
			]}, 
38
			datasource { dups = false, links = [
39
			{ relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } 
40
			]}, 
41
			organization { dups = false, links = [
42
			{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, 
43
			{ relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } 
44
			]}, 
45
			project { dups = false, links = [
46
			{ relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, 
47
			{ relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, 
48
			{ relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } 
49
			]}} -->
23
			<main-class>eu.dnetlib.iis.core.workflows.stats.HbaseScannerGenerator
24
			</main-class>
50 25
			<!-- column families: -->
51
		
52
			<arg>-f 
53
			    datasource ,datasourceOrganization_provision_provides ,organization,  
54
<!-- 			    projectOrganization_participation_isParticipant,  -->
55
 			project
56
<!--  			,projectOrganization_participation_hasParticipant -->
57
, result, resultProject_outcome_produces,  
58
				personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo 
59
				</arg>
60
		 
61
			 <capture-output />
26

  
27
			<arg>
28
				-f 
29
				datasource
30
				<!-- , datasourceOrganization_provision_provides ,organization, -->
31
				<!-- projectOrganization_participation_isParticipant, -->
32
				<!-- project -->
33
				<!-- ,projectOrganization_participation_hasParticipant -->
34
				<!-- , -->
35
<!-- 				result -->
36
				<!-- , resultProject_outcome_produces, -->
37
				<!-- personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo -->
38
			</arg>
39

  
40
			<capture-output />
62 41
		</java>
63 42
		<ok to="mr_export" />
64 43
		<error to="fail" />
65 44
	</action>
66 45
	<action name="mr_export">
67 46
		<map-reduce>
68
			
47

  
69 48
			<prepare>
70 49
				<delete path="${nameNode}${Stats_output_Path}" />
71
				
50

  
72 51
			</prepare>
73 52
			<configuration>
74 53
				<property>
......
79 58
					<name>hbase.rootdir</name>
80 59
					<value>hdfs://nmis-hadoop-cluster/hbase</value>
81 60
				</property>
82
				
61

  
83 62
				<property>
84 63
					<name>hbase.security.authentication</name>
85 64
					<value>simple</value>
86 65
				</property>
87 66
				<!-- ZOOKEEPER -->
88
				
67

  
89 68
				<property>
90 69
					<name>hbase.zookeeper.quorum</name>
91 70
					<value>
......
96 75
					<name>zookeeper.znode.rootserver</name>
97 76
					<value>root-region-server</value>
98 77
				</property>
99
				
78

  
100 79
				<property>
101 80
					<name>hbase.zookeeper.property.clientPort</name>
102 81
					<value>2182</value>
103 82
				</property>
104
				 
105
				
83

  
84

  
106 85
				<!-- MR IO -->
107
				
108
				
86

  
87

  
109 88
				<property>
110 89
					<name>mapreduce.inputformat.class</name>
111 90
					<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
112 91
				</property>
113
				
92

  
114 93
				<property>
115 94
					<name>mapred.mapoutput.key.class</name>
116 95
					<value>org.apache.hadoop.io.Text</value>
......
125 104
				</property>
126 105
				<property>
127 106
					<name>mapred.output.value.class</name>
128
					<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
107
					<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
108
					</value>
129 109
				</property>
130
				
110

  
131 111
				<!-- ## This is required for new MapReduce API usage -->
132 112
				<property>
133 113
					<name>mapred.mapper.new-api</name>
......
137 117
					<name>mapred.reducer.new-api</name>
138 118
					<value>true</value>
139 119
				</property>
140
				
120

  
141 121
				<!-- # Job-specific options -->
142 122
				<property>
143 123
					<name>dfs.blocksize</name>
......
159 139
					<name>mapreduce.map.speculative</name>
160 140
					<value>false</value>
161 141
				</property>
162
				
142

  
163 143
				<!-- I/O FORMAT -->
164
<!-- 				IMPORTANT: sets default delimeter used by text output writer. 
165
                   Required to fix issue with traling tab added between id and value in multiple outputs -->
166
					<property>
144
				<!-- IMPORTANT: sets default delimeter used by text output writer. Required 
145
					to fix issue with traling tab added between id and value in multiple outputs -->
146
				<property>
167 147
					<name>mapred.textoutputformat.separator</name>
168 148
					<value>${Stats_delim_Character}</value>
169 149
				</property>
170 150
				<!-- ## Names of all output ports -->
171
				 
151

  
172 152
				<property>
173 153
					<name>mapreduce.multipleoutputs</name>
174
					
175
				 <value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}</value>  
176
                  
154

  
155
					<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7}
156
						${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14}
157
						${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21}
158
					</value>
159

  
177 160
				</property>
178 161
				<!-- datasource -->
179 162
				<property>
......
186 169
				</property>
187 170
				<property>
188 171
					<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
189
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
172
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
173
					</value>
190 174
				</property>
191 175
				<!-- datasourceLanguage -->
192 176
				<property>
......
199 183
				</property>
200 184
				<property>
201 185
					<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
202
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
186
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
187
					</value>
203 188
				</property>
204
			 
205
			 
206
				
189

  
190

  
191

  
207 192
				<!-- datasourceOrganization -->
208 193
				<property>
209 194
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
......
215 200
				</property>
216 201
				<property>
217 202
					<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
218
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
203
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
204
					</value>
219 205
				</property>
220
				
221
					<!-- datasourceTopic -->
206

  
207
				<!-- datasourceTopic -->
222 208
				<property>
223 209
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
224 210
					<value>org.apache.hadoop.io.Text</value>
......
229 215
				</property>
230 216
				<property>
231 217
					<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
232
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
218
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
219
					</value>
233 220
				</property>
234
				
235
					<!-- resultDatasource -->
221

  
222
				<!-- resultDatasource -->
236 223
				<property>
237 224
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
238 225
					<value>org.apache.hadoop.io.Text</value>
......
243 230
				</property>
244 231
				<property>
245 232
					<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
246
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
233
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
234
					</value>
247 235
				</property>
248
					<!-- organization -->
236
				<!-- organization -->
249 237
				<property>
250 238
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
251 239
					<value>org.apache.hadoop.io.Text</value>
......
256 244
				</property>
257 245
				<property>
258 246
					<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
259
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
247
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
248
					</value>
260 249
				</property>
261
				
262
				 <!-- projectOrganization -->
250

  
251
				<!-- projectOrganization -->
263 252
				<property>
264 253
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.key</name>
265 254
					<value>org.apache.hadoop.io.Text</value>
......
270 259
				</property>
271 260
				<property>
272 261
					<name>mapreduce.multipleoutputs.namedOutput.${out7}.format</name>
273
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
262
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
263
					</value>
274 264
				</property>
275
									<!-- resultProject -->
265
				<!-- resultProject -->
276 266
				<property>
277 267
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.key</name>
278 268
					<value>org.apache.hadoop.io.Text</value>
......
283 273
				</property>
284 274
				<property>
285 275
					<name>mapreduce.multipleoutputs.namedOutput.${out8}.format</name>
286
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
276
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
277
					</value>
287 278
				</property>
288
				
289
				 <!-- project -->
279

  
280
				<!-- project -->
290 281
				<property>
291 282
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.key</name>
292 283
					<value>org.apache.hadoop.io.Text</value>
......
297 288
				</property>
298 289
				<property>
299 290
					<name>mapreduce.multipleoutputs.namedOutput.${out9}.format</name>
300
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
291
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
292
					</value>
301 293
				</property>
302
					
303
		    <!-- resultConcept -->
294

  
295
				<!-- resultConcept -->
304 296
				<property>
305 297
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.key</name>
306 298
					<value>org.apache.hadoop.io.Text</value>
......
311 303
				</property>
312 304
				<property>
313 305
					<name>mapreduce.multipleoutputs.namedOutput.${out10}.format</name>
314
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
306
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
307
					</value>
315 308
				</property>
316
				
309

  
317 310
				<!-- resultClaim -->
318 311
				<property>
319 312
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.key</name>
......
325 318
				</property>
326 319
				<property>
327 320
					<name>mapreduce.multipleoutputs.namedOutput.${out11}.format</name>
328
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
321
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
322
					</value>
329 323
				</property>
330
					
324

  
331 325
				<!-- resultClassification -->
332 326
				<property>
333 327
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.key</name>
......
339 333
				</property>
340 334
				<property>
341 335
					<name>mapreduce.multipleoutputs.namedOutput.${out12}.format</name>
342
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
336
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
337
					</value>
343 338
				</property>
344
					
339

  
345 340
				<!-- resultLanguage -->
346 341
				<property>
347 342
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.key</name>
......
353 348
				</property>
354 349
				<property>
355 350
					<name>mapreduce.multipleoutputs.namedOutput.${out13}.format</name>
356
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
351
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
352
					</value>
357 353
				</property>
358
				
359
				  <!-- resultProject -->
354

  
355
				<!-- resultProject -->
360 356
				<property>
361 357
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.key</name>
362 358
					<value>org.apache.hadoop.io.Text</value>
......
367 363
				</property>
368 364
				<property>
369 365
					<name>mapreduce.multipleoutputs.namedOutput.${out14}.format</name>
370
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
366
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
367
					</value>
371 368
				</property>
372
  			  <!-- resultResult -->
369
				<!-- resultResult -->
373 370
				<property>
374 371
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.key</name>
375 372
					<value>org.apache.hadoop.io.Text</value>
......
380 377
				</property>
381 378
				<property>
382 379
					<name>mapreduce.multipleoutputs.namedOutput.${out15}.format</name>
383
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
380
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
381
					</value>
384 382
				</property>
385
					  <!-- resultTopic -->
383
				<!-- resultTopic -->
386 384
				<property>
387 385
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.key</name>
388 386
					<value>org.apache.hadoop.io.Text</value>
......
393 391
				</property>
394 392
				<property>
395 393
					<name>mapreduce.multipleoutputs.namedOutput.${out16}.format</name>
396
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
394
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
395
					</value>
397 396
				</property>
398
				
399
				  <!-- resultDatasource -->
397

  
398
				<!-- resultDatasource -->
400 399
				<property>
401 400
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.key</name>
402 401
					<value>org.apache.hadoop.io.Text</value>
......
407 406
				</property>
408 407
				<property>
409 408
					<name>mapreduce.multipleoutputs.namedOutput.${out17}.format</name>
410
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
409
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
410
					</value>
411 411
				</property>
412
				
413
				
414
				
415
					  <!-- result -->
412

  
413

  
414

  
415
				<!-- result -->
416 416
				<property>
417 417
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.key</name>
418 418
					<value>org.apache.hadoop.io.Text</value>
......
423 423
				</property>
424 424
				<property>
425 425
					<name>mapreduce.multipleoutputs.namedOutput.${out18}.format</name>
426
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
426
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
427
					</value>
427 428
				</property>
428
				
429
				  <!-- claim -->
429

  
430
				<!-- claim -->
430 431
				<property>
431 432
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.key</name>
432 433
					<value>org.apache.hadoop.io.Text</value>
......
437 438
				</property>
438 439
				<property>
439 440
					<name>mapreduce.multipleoutputs.namedOutput.${out19}.format</name>
440
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
441
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
442
					</value>
441 443
				</property>
442
			  <!-- cncept -->
444
				<!-- cncept -->
443 445
				<property>
444 446
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.key</name>
445 447
					<value>org.apache.hadoop.io.Text</value>
......
450 452
				</property>
451 453
				<property>
452 454
					<name>mapreduce.multipleoutputs.namedOutput.${out20}.format</name>
453
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
455
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
456
					</value>
454 457
				</property>
455
				
456
				  <!-- category -->
458

  
459
				<!-- category -->
457 460
				<property>
458 461
					<name>mapreduce.multipleoutputs.namedOutput.${out21}.key</name>
459 462
					<value>org.apache.hadoop.io.Text</value>
......
464 467
				</property>
465 468
				<property>
466 469
					<name>mapreduce.multipleoutputs.namedOutput.${out21}.format</name>
467
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
470
					<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
471
					</value>
468 472
				</property>
469 473
				<!-- ## Classes of mapper and reducer -->
470
				
474

  
471 475
				<property>
472 476
					<name>mapreduce.map.class</name>
473
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper</value>
477
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper
478
					</value>
474 479
				</property>
475 480
				<property>
476 481
					<name>mapreduce.reduce.class</name>
477
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer</value>
482
					<value>eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer
483
					</value>
478 484
				</property>
479 485
				<property>
480 486
					<name>io.serializations</name>
481
					<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
487
					<value>org.apache.hadoop.io.serializer.WritableSerialization
488
					</value>
482 489
				</property>
483 490
				<!-- ## Custom config -->
484
				
491

  
485 492
				<!--delim character used to seperate fields in hdfs dump files <property> -->
486 493
				<property>
487 494
					<name>stats.delim</name>
......
502 509
					<name>hbase.mapreduce.inputtable</name>
503 510
					<value>${Stats_Hbase_Source_Table}</value>
504 511
				</property>
505
				 <property>
506
					<name>indexConf</name>
507
					<value>${indexConf}</value>
512
				<property>
513
					<!-- mapping of protos entities to tables in the relDB -->
514
					<name>stats.dbTablesMap</name>
515
					<value>${Stats_db_table_map}</value>
508 516
				</property>
509
				
510
				
511
				<!-- This directory does not correspond to a data store. In fact, this directory only
512
					contains multiple data stores. It has to be set to the name of the workflow node.
513
					-->
517
				 
518
            	<!-- This directory does not correspond to a data store. In fact, this 
519
					directory only contains multiple data stores. It has to be set to the name 
520
					of the workflow node. -->
514 521
				<property>
515 522
					<name>mapred.output.dir</name>
516 523
					<value>${Stats_output_Path}</value>
517 524
				</property>
525
				<property>
526
					<name>stats.indexConf</name>
527
					<value>${Stats_indexConf}</value>
528
				</property>
518 529
				<!-- ## Workflow node parameters -->
519 530
				<property>
520 531
					<name>mapred.reduce.tasks</name>
521
					<value>10</value>
532
					<value>${numReducers}</value>
522 533
				</property>
523
				
524
				
525
				  
526
				
527
				
528 534
			</configuration>
529 535
		</map-reduce>
530
		<ok to="prepareDatabase" />
536
		<ok to="sqoopImport" />
531 537
		<error to="fail" />
532 538
	</action>
533
	
539

  
534 540
	<action name="prepareDatabase">
535 541
		<java>
536
			
542

  
537 543
			<prepare>
538 544
			</prepare>
539 545
			<configuration>
......
542 548
					<value>${queueName}</value>
543 549
				</property>
544 550
			</configuration>
545
			
551

  
546 552
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
547 553
			<arg>-SworkingDir=${workingDir}</arg>
548 554
			<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
549
			
555

  
550 556
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
551 557
			<arg>-PStats_db_User=${Stats_db_User}</arg>
552 558
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
553 559
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
554
			
560

  
555 561
		</java>
556 562
		<ok to="sqoopImport" />
557 563
		<error to="fail" />
558 564
	</action>
559
	
565

  
560 566
	<action name="sqoopImport">
561 567
		<java>
562 568
			<prepare>
......
566 572
					<name>mapred.job.queue.name</name>
567 573
					<value>${queueName}</value>
568 574
				</property>
569
				
575

  
570 576
				<property>
571 577
					<name>oozie.sqoop.log.level</name>
572 578
					<value>DEBUG</value>
573 579
				</property>
574
				
580

  
575 581
			</configuration>
576
			
582

  
577 583
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
578 584
			<arg>-SworkingDir=${workingDir}</arg>
579 585
			<arg>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</arg>
580
			
586

  
581 587
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
582 588
			<arg>-PStats_db_User=${Stats_db_User}</arg>
583 589
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
584
			
590

  
585 591
			<arg>-PStats_output_Path=${Stats_output_Path}</arg>
586
			<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}</arg>
592
			<arg>-PStats_sqoop_RecsPerStatement=${Stats_sqoop_RecsPerStatement}
593
			</arg>
587 594
			<arg>-PStats_sqoop_ReducersCount=${Stats_sqoop_ReducersCount}</arg>
588
			<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}</arg>
595
			<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans}
596
			</arg>
589 597
			<arg>-PStats_delim_Character=${Stats_delim_Character}</arg>
590
		</java>
598
			<arg>-PStats_newline_Character=${Stats_newline_Character}</arg>
599
			<arg>-PStats_db_table_map=${Stats_db_table_map}</arg>
600
     	</java>
591 601
		<ok to="end" />
592 602
		<error to="fail" />
593 603
	</action>
594
	 
604

  
595 605
	<action name="finalizeDatabase">
596 606
		<java>
597
			
607

  
598 608
			<prepare>
599 609
			</prepare>
600 610
			<configuration>
......
603 613
					<value>${queueName}</value>
604 614
				</property>
605 615
			</configuration>
606
			
616

  
607 617
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
608 618
			<arg>-SworkingDir=${workingDir}</arg>
609 619
			<arg>eu.dnetlib.iis.core.workflows.stats.DBFinalizeWrapper</arg>
610 620
			<arg>-PStats_db_Url=${Stats_db_Url}</arg>
611 621
			<arg>-PStats_db_User=${Stats_db_User}</arg>
612 622
			<arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
613
			 <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
614
			 <arg>-PStats_output_Path=${Stats_output_Path}</arg>
623
			<arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
624
			<arg>-PStats_output_Path=${Stats_output_Path}</arg>
625
			<arg>-PStats_ContextResourceXML=${ContextResourceXML}</arg>
626

  
615 627
		</java>
616 628
		<ok to="end" />
617 629
		<error to="fail" />
......
621 633
			<job-tracker>${jobTracker}</job-tracker>
622 634
			<name-node>${nameNode}</name-node>
623 635
			<prepare>
624
			 </prepare>
636
			</prepare>
625 637
			<configuration>
626
				 <property>
627
                     <name>mapred.job.queue.name</name>
628
                     <value>${queueName}</value>
629
                 </property>
630 638
				<property>
639
					<name>mapred.job.queue.name</name>
640
					<value>${queueName}</value>
641
				</property>
642
				<property>
631 643
					<name>oozie.sqoop.log.level</name>
632 644
					<value>DEBUG</value>
633 645
				</property>
634 646
			</configuration>
635
		  <command> export -Dsqoop.export.records.per.statement=1000 -Dsqoop.statements.per.transaction==1000 --connect jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats --export-dir /tmp/test_stats/datasource-r-00000  --table datasource --username sqoop --password sqoop  --input-fields-terminated-by ! -m 4
636
	 	</command>	
637
		
647
			<command> export -Dsqoop.export.records.per.statement=1000
648
				-Dsqoop.statements.per.transaction==1000 --connect
649
				jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats --export-dir
650
				/tmp/test_stats/datasource-r-00000 --table datasource --username
651
				sqoop --password sqoop --input-fields-terminated-by ! -m 4
652
			</command>
653

  
638 654
		</sqoop>
639 655
		<ok to="end" />
640 656
		<error to="fail" />
641 657
	</action>
642
	 
658

  
643 659
	<kill name="fail">
644 660
		<message>
645
			Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
661
			Unfortunately, the process failed -- error message:
662
			[${wf:errorMessage(wf:lastErrorNode())}]
646 663
		</message>
647 664
	</kill>
648 665
	<end name="end" />

Also available in: Unified diff