/modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_explicit_schema_file/oozie_app/workflow.xml - D-Net - D-Net project tracking tool

dnet40/modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_explicit_schema_file/oozie_app/workflow.xml @ 36309

       <?xml version="1.0"?>
       <!-- Note that documentation placed in comments in this file uses the
       "markdown" syntax (along with its way of dividing text into sections). -->
       <workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner_without_reducer_with_explicit_schema_file">
       	<start to="data_producer" />
       	<action name="data_producer">
       		<java>
       			<job-tracker>${jobTracker}</job-tracker>
       			<name-node>${nameNode}</name-node>
       			<!-- The data generated by this node is deleted in this section -->
       			<prepare>
       				<delete path="${nameNode}${workingDir}/data_producer" />
       				<mkdir path="${nameNode}${workingDir}/data_producer" />
       			</prepare>
       			<configuration>
       				<property>
       					<name>mapred.job.queue.name</name>
       					<value>${queueName}</value>
       				</property>
       			</configuration>
       			<!-- This is simple wrapper for the Java code -->
       			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
       			<!-- The business Java code that gets to be executed -->
       			<arg>eu.dnetlib.iis.core.examples.java.SampleDataProducer</arg>
       			<!-- All input and output ports have to be bound to paths in HDFS -->
       			<arg>-Operson=${workingDir}/data_producer/person</arg>
       			<arg>-Odocument=${workingDir}/data_producer/document</arg>
       		</java>
       		<ok to="python_cloner" />
       		<error to="fail" />
       	</action>
           <action name="python_cloner">
               <map-reduce>
                   <job-tracker>${jobTracker}</job-tracker>
                   <name-node>${nameNode}</name-node>
                   <prepare>
                       <delete path="${nameNode}${workingDir}/python_cloner"/>
                       <mkdir path="${nameNode}${workingDir}/python_cloner"/>
                   </prepare>
                   <streaming>
                   	<!-- Here, we give the relative path to the script and pass it
                   	the parameters of the workflow node. The script is held
                   	in a directory having the same name as the workflow node.
                   	The parameters should be passed as **named** arguments. This
                   	convention of passing them as named arguments makes the code
                   	more readable/maintainable.
                   	 -->
                       <mapper>scripts/python_cloner/cloner.py --copies 3</mapper>
                   </streaming>
                   <configuration>
                   	<!-- # Standard settings for our framework -->
                       <property>
                           <name>mapred.output.format.class</name>
                           <value>com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat</value>
                       </property>
                       <property>
                           <name>mapred.input.format.class</name>
                           <value>com.cloudera.science.avro.streaming.AvroAsJSONInputFormat</value>
                       </property>
                       <!-- # Custom settings for this workflow node -->
                       <!-- We do not use any reducers, so we set their number to 0 -->
                       <property>
                           <name>mapred.reduce.tasks</name>
                           <value>0</value>
                       </property>
                       <property>
                           <name>mapred.input.dir</name>
                           <value>${workingDir}/data_producer/person</value>
                       </property>
                       <!-- Path to the input schema. This is held in the same
                       directory as the script. -->
                       <property>
                           <name>input.schema.url</name>
                           <value>${wf:appPath()}/lib/scripts/python_cloner/Person.avsc</value>
                       </property>
                       <property>
                           <name>mapred.output.dir</name>
                           <value>${workingDir}/python_cloner/output</value>
                       </property>
                       <!-- Path to the output schema. This is held in the same
                       directory as the script. -->
                       <property>
                           <name>output.schema.url</name>
                           <value>${wf:appPath()}/lib/scripts/python_cloner/Person.avsc</value>
                       </property>
                   </configuration>
               </map-reduce>
               <ok to="cloner"/>
               <error to="fail"/>
           </action>
       	<action name="cloner">
       		<java>
       			<job-tracker>${jobTracker}</job-tracker>
       			<name-node>${nameNode}</name-node>
       			<!-- The data generated by this node is deleted in this section -->
       			<prepare>
       				<delete path="${nameNode}${workingDir}/cloner" />
       				<mkdir path="${nameNode}${workingDir}/cloner" />
       			</prepare>
       			<configuration>
       				<property>
       					<name>mapred.job.queue.name</name>
       					<value>${queueName}</value>
       				</property>
       			</configuration>
       			<!-- This is simple wrapper for the Java code -->
       			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
       			<!-- The business Java code that gets to be executed -->
       			<arg>eu.dnetlib.iis.core.examples.java.PersonCloner</arg>
       			<!-- All input and output ports have to be bound to paths in HDFS -->
       			<arg>-Iperson=${workingDir}/python_cloner/output</arg>
       			<arg>-Operson=${workingDir}/cloner/person</arg>
       		</java>
       		<ok to="end" />
       		<error to="fail" />
       	</action>
           <kill name="fail">
               <message>Unfortunately, the process failed -- error message:
               			[${wf:errorMessage(wf:lastErrorNode())}]
               		</message>
           </kill>
           <end name="end"/>
       </workflow-app>

(1-1/1)

Project

General

Profile

D-Net