Project

General

Profile

« Previous | Next » 

Revision 36290

Added by Marek Horst about 9 years ago

#1257 dropping schema generation related hacks in all map-reduce modules, switching to literal schema parameters

View differences:

modules/icm-iis-documentssimilarity/trunk/src/main/resources/eu/dnetlib/iis/documentssimilarity/converter/tsv_to_avro/oozie_app/workflow.xml
12 12
		</property>
13 13
	</parameters>
14 14
              
15
    <start to="converter" />
15
    <start to="generate-schema" />
16 16

  
17
	<action name="generate-schema">
18
	    <java>
19
    		<job-tracker>${jobTracker}</job-tracker>
20
        	<name-node>${nameNode}</name-node>
21
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
22
	        <arg>eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity</arg>
23
	        <arg>org.apache.avro.Schema.Type.NULL</arg>
24
	        <capture-output />
25
	    </java>
26
	    <ok to="converter" />
27
	    <error to="fail" />
28
	</action>
29

  
17 30
    <action name="converter">
18 31
        <map-reduce>
19 32
            <job-tracker>${jobTracker}</job-tracker>
......
29 42
                </property>
30 43
                <property>
31 44
                    <name>mapreduce.outputformat.class</name>
32
                    <value>eu.dnetlib.iis.core.javamapreduce.hack.KeyOutputFormat</value>
45
                    <value>org.apache.avro.mapreduce.AvroKeyOutputFormat</value>
33 46
                </property>
34 47
                <property>
35 48
                    <name>mapred.mapoutput.key.class</name>
......
58 71
                </property>
59 72
                <property>
60 73
                    <name>mapred.output.value.groupfn.class</name>
61
                    <value>eu.dnetlib.iis.core.javamapreduce.hack.KeyComparator</value>
74
                    <value>org.apache.avro.hadoop.io.AvroKeyComparator</value>
62 75
                </property>
63 76
                <property>
64 77
                    <name>rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB
......
87 100
                <!-- ## Schemas -->
88 101
                <!-- ### Schemas of the data produced by the mapper -->
89 102
                <property>
90
                    <name>eu.dnetlib.iis.avro.map.output.key.class</name>
91
                    <value>eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity</value>
103
                    <name>avro.serialization.key.reader.schema</name>
104
                    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity']}</value>
92 105
                </property>
93 106
                <property>
94
                    <name>eu.dnetlib.iis.avro.map.output.value.class</name>
95
                    <value>org.apache.avro.Schema.Type.NULL</value>
107
                    <name>avro.serialization.key.writer.schema</name>
108
                    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity']}</value>
96 109
                </property>
97

  
110
                
111
				<property>
112
                    <name>avro.serialization.value.reader.schema</name>
113
                    <value>${wf:actionData('generate-schema')['org.apache.avro.Schema.Type.NULL']}</value>
114
                </property>
115
                <property>
116
                    <name>avro.serialization.value.writer.schema</name>
117
                    <value>${wf:actionData('generate-schema')['org.apache.avro.Schema.Type.NULL']}</value>
118
                </property>
98 119
                <!-- ### Schema of the data produced by the reducer. -->
99 120
                <property>
100
                    <name>eu.dnetlib.iis.avro.output.class</name>
101
                    <value>eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity</value>
121
                    <name>avro.schema.output.key</name>
122
                    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity']}</value>
102 123
                </property>
103 124

  
104 125
                <!-- ## Specification of the input and output data store -->

Also available in: Unified diff