Project

General

Profile

1
<?xml version="1.0"?>
2
<!-- Note that documentation placed in comments in this file uses the 
3
"markdown" syntax (along with its way of dividing text into sections). -->
4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner_without_reducer_with_subworkflow">
5
	<start to="data_producer" />
6
	<action name="data_producer">
7
		<java>
8
			<job-tracker>${jobTracker}</job-tracker>
9
			<name-node>${nameNode}</name-node>
10
			<!-- The data generated by this node is deleted in this section -->
11
			<prepare>
12
				<delete path="${nameNode}${workingDir}/data_producer" />
13
				<mkdir path="${nameNode}${workingDir}/data_producer" />
14
			</prepare>
15
			<configuration>
16
				<property>
17
					<name>mapred.job.queue.name</name>
18
					<value>${queueName}</value>
19
				</property>
20
			</configuration>
21
			<!-- This is simple wrapper for the Java code -->
22
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
23
			<!-- The business Java code that gets to be executed -->
24
			<arg>eu.dnetlib.iis.core.examples.java.SampleDataProducer</arg>
25
			<!-- All input and output ports have to be bound to paths in HDFS -->
26
			<arg>-Operson=${workingDir}/data_producer/person</arg>
27
			<arg>-Odocument=${workingDir}/data_producer/document</arg>
28
		</java>
29
		<ok to="python_cloner" />
30
		<error to="fail" />
31
	</action>
32
    <action name="python_cloner">
33
        <map-reduce>
34
            <job-tracker>${jobTracker}</job-tracker>
35
            <name-node>${nameNode}</name-node>
36
            <prepare>
37
                <delete path="${nameNode}${workingDir}/python_cloner"/>
38
                <mkdir path="${nameNode}${workingDir}/python_cloner"/>
39
            </prepare>
40
            <streaming>
41
            	<!-- Here, we give the relative path to the script and pass it
42
            	the parameters of the workflow node. The script is held
43
            	in a directory having the same name as the workflow node.
44
            	
45
            	The parameters should be passed as **named** arguments. This
46
            	convention of passing them as named arguments makes the code
47
            	more readable/maintainable.
48
            	 -->
49
                <mapper>scripts/python_cloner/cloner.py --copies 2</mapper>
50
            </streaming>
51
            <configuration>
52
            	<!-- # Standard settings for our framework -->
53
                <property>
54
                    <name>mapred.output.format.class</name>
55
                    <value>com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat</value>
56
                </property>
57
                <property>
58
                    <name>mapred.input.format.class</name>
59
                    <value>com.cloudera.science.avro.streaming.AvroAsJSONInputFormat</value>
60
                </property>
61
                <!-- # Custom settings for this workflow node -->
62
                 <!-- We do not use any reducers, so we set their number to 0 -->
63
                <property>
64
                    <name>mapred.reduce.tasks</name>
65
                    <value>0</value>
66
                </property>
67
                <property>
68
                    <name>mapred.input.dir</name>
69
                    <value>${workingDir}/data_producer/person</value>
70
                </property>
71
                <!-- Name of the input schema. -->
72
                <property>
73
                    <name>eu.dnetlib.iis.avro.input.class</name>
74
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
75
                </property>
76
                <property>
77
                    <name>mapred.output.dir</name>
78
                    <value>${workingDir}/python_cloner/output</value>
79
                </property>
80
                <!-- Name of the output schema. -->
81
                <property>
82
                    <name>eu.dnetlib.iis.avro.output.class</name>
83
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
84
                </property>
85
            </configuration>
86
        </map-reduce>
87
        <ok to="my_subworkflow"/>
88
        <error to="fail"/>
89
    </action>
90
    <action name="my_subworkflow">
91
        <sub-workflow>
92
            <app-path>${wf:appPath()}/my_subworkflow</app-path>
93
            <configuration>
94
                <property>
95
                    <name>jobTracker</name>
96
                    <value>${jobTracker}</value>
97
                </property>
98
                <property>
99
                    <name>nameNode</name>
100
                    <value>${nameNode}</value>
101
                </property>
102
                <property>
103
                    <name>queueName</name>
104
                    <value>${queueName}</value>
105
                </property>
106
                <!-- Working directory of the subworkflow -->
107
                <property>
108
                    <name>workingDir</name>
109
                    <value>${workingDir}/my_subworkflow/working_dir</value>
110
                </property>
111
                <!-- Input port "person" bound to given path -->
112
                <property>
113
                    <name>input_person</name>
114
                    <value>${workingDir}/python_cloner/output</value>
115
                </property>
116
                <!-- Output port "person" bound to given path -->
117
                <property>
118
                    <name>output_person</name>
119
                    <value>${workingDir}/my_subworkflow/person</value>
120
                </property>
121
            </configuration>
122
        </sub-workflow>
123
        <ok to="end"/>
124
        <error to="fail"/>
125
    </action>
126
    <kill name="fail">
127
        <message>Unfortunately, the process failed -- error message:
128
        			[${wf:errorMessage(wf:lastErrorNode())}]
129
        		</message>
130
    </kill>
131
    <end name="end"/>
132
</workflow-app>
133

    
134

    
    (1-1/1)