Project

General

Profile

« Previous | Next » 

Revision 28161

Added by Eri Katsari over 10 years ago

u

View differences:

modules/dnet-openaire-stats/trunk/src/main/resources/eu/dnetlib/data/mapreduce/hbase/statsExport/statsJob.xml
1 1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ed8c0a4e-7cf2-49df-bfed-fcfab0699ade_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="StatsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that exports hbase data and prepares them for import to the relation database used for statistics generation</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
 
14
	 
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="ed8c0a4e-7cf2-49df-bfed-fcfab0699ade_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="StatsJob" type="mapreduce">
11
			<DESCRIPTION>
12
				map reduce job that exports hbase data and prepares them for import to the relation
13
				database used for statistics generation
14
			</DESCRIPTION>
15
			<STATIC_CONFIGURATION>
16
			 
15 17
				<!-- I/O FORMAT -->
16 18
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
18
        	
19
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
20
				
21
				<!-- MAPPER -->
20 22
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.statsExport.StatsMapper" />
21 23
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22 24
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
23 25
				
24
			 <!--MultipleOutputs.addNamedOutput(job, type.toString(), TextOutputFormat.class, Text.class, Text.class);-->
25
				
26
			 <!-- REDUCER -->
26
				<!-- REDUCER -->
27 27
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.statsExport.StatsReducer" />
28
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
29
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
28
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
29
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
30 30
				
31 31
				<!-- MISC -->
32
				<PROPERTY key="mapred.output.compress" value="false" />						
33
				<PROPERTY key="mapred.compress.map.output" value="false" />	
34
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
35
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
36
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapred.output.compress" value="false" />
33
				<PROPERTY key="mapred.compress.map.output" value="false" />
34
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
35
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
36
				<PROPERTY key="mapreduce.map.speculative" value="false" />
37 37
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
38
							
39
				<PROPERTY key="mapred.reduce.tasks" value="500" />
38
				
39
				<PROPERTY key="mapred.reduce.tasks" value="10" />
40 40
				<PROPERTY key="dfs.blocksize" value="32M" />
41
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
42 41
				
43
		<!--  	Uncomment to override the default lib path -->			
44
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
45
        	</STATIC_CONFIGURATION>
46
        	<JOB_INTERFACE>
47
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
48
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
49
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
50
        		
51
        		<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> 
52
        		<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" />        		
53
         
54
        			
55
         </JOB_INTERFACE>
56
        	<SCAN>
57
        		<FILTERS />
58
        		<FAMILIES />
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
42
				<!-- MULTIPLE OUTPUTS FOR REDUCER -->
43
				
44
				<PROPERTY key="mapreduce.multipleoutputs" value=" datasource" />
45
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.datasource.format" value="TextOutputFormat.class" />
46
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.datasource.key" value="Text.class" />
47
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.datasource.value" value="Text.class" />
48
				<!-- conf.setClass("mapreduce.multipleoutputs.namedOutput.datasource.format" outputFormatClass,
49
					OutputFormat.class); conf.set(mapreduce.multipleoutputs, conf.get(mapreduce.multipleoutputs,
50
					"") + " " + namedOutput); MultipleOutputs.addNamedOutput(job, type.toString(),
51
					TextOutputFormat.class, Text.class, Text.class); -->
52
			</STATIC_CONFIGURATION>
53
			<JOB_INTERFACE>
54
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
55
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
56
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />
57
				
58
				<PARAM name="mapred.output.delim" required="true" description="delim character used  to seperate fields in hdfs dump files" />
59
				<PARAM name="mapred.output.nullNum" required="true" description=" default string for Null Numeric Values" />
60
				<PARAM name="mapred.output.nullString" required="true" description=" default string for Null String Values" />
61
				
62
				<PARAM name="index.entity.links" required="true" description="entity joiner configuration" />
63
				<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" />
64
				
65
				
66
			</JOB_INTERFACE>
67
			<SCAN>
68
				
69
				<FILTERS  >
70
					
71
				</FILTERS>
72
				<FAMILIES>
73
					<FAMILY param="entityType" value="datasource"/>
74
				</FAMILIES>
75
			</SCAN>
76
		</HADOOP_JOB>
77
		<STATUS>
78
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
79
			<RUNNING_INSTANCES value="0"/>
80
			<CUMULATIVE_RUN value="0" />
81
		</STATUS>
82
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
83
	</BODY>
68 84
</RESOURCE_PROFILE>

Also available in: Unified diff