Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="union">
2
	
3
	<parameters>
4
        <property>
5
			<name>origin_1</name>
6
			<description>origin of the first port</description>
7
		</property>
8
		<property>
9
			<name>input_1</name>
10
			<description>first port</description>
11
		</property>
12
        <property>
13
			<name>origin_2</name>
14
			<description>origin of the second port</description>
15
		</property>
16
		<property>
17
			<name>input_2</name>
18
			<description>second port</description>
19
		</property>
20
        <property>
21
			<name>origin_3</name>
22
            <value>$UNDEFINED$</value>
23
			<description>origin of the third port; optional</description>
24
		</property>
25
		<property>
26
			<name>input_3</name>
27
            <value>$UNDEFINED$</value>
28
			<description>third port; optional</description>
29
		</property>
30
        <property>
31
			<name>schema_input</name>
32
			<description>input schema</description>
33
		</property>
34
        <property>
35
			<name>output</name>
36
			<description>output port</description>
37
		</property>
38
        <property>
39
			<name>schema_output</name>
40
			<description>output schema</description>
41
		</property>
42
	</parameters>
43

    
44
    <global>
45
        <job-tracker>${jobTracker}</job-tracker>
46
        <name-node>${nameNode}</name-node>
47
        <configuration>
48
            <property>
49
                <name>mapred.job.queue.name</name>
50
                <value>${queueName}</value>
51
            </property>
52
        </configuration>
53
    </global>
54

    
55
    <start to="generate-schema"/>
56
    
57
    <action name="generate-schema">
58
	    <java>
59
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
60
	        <arg>${schema_input}</arg>
61
	        <arg>${schema_output}</arg>
62
	        <capture-output />
63
	    </java>
64
	    <ok to="union_origin_fork" />
65
	    <error to="fail" />
66
	</action>
67
    
68
    <fork name="union_origin_fork">
69
        <path start="union_origin_1"/>
70
        <path start="union_origin_2"/>
71
        <path start="decision_union_origin_3"/>
72
    </fork>
73
    
74
    <action name="union_origin_1">
75
        <pig>
76
			<!-- The data generated by this node is deleted in this section -->
77
			<prepare>
78
				<delete path="${nameNode}${workingDir}/union_origin_1" />
79
				<delete path="${nameNode}${output}" />
80
				<mkdir path="${nameNode}${workingDir}/union_origin_1" />
81
			</prepare>
82
            <!-- Path to PIG script the workflow executes. -->
83
            <script>lib/scripts/origin_add.pig</script>
84

    
85
            <param>input=${input_1}</param>
86
            <param>schema_input=${wf:actionData('generate-schema')[wf:conf('schema_input')]}</param>
87

    
88
            <param>output=${workingDir}/union_origin_1/output</param>
89
            <param>schema_output=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
90

    
91
            <param>origin_value=${origin_1}</param>
92
        </pig>
93
        <ok to="union_origin_join"/>
94
        <error to="fail"/>
95
    </action>
96
    
97
    <action name="union_origin_2">
98
        <pig>
99
			<!-- The data generated by this node is deleted in this section -->
100
			<prepare>
101
				<delete path="${nameNode}${workingDir}/union_origin_2" />
102
				<delete path="${nameNode}${output}" />
103
				<mkdir path="${nameNode}${workingDir}/union_origin_2" />
104
			</prepare>
105
            <!-- Path to PIG script the workflow executes. -->
106
            <script>lib/scripts/origin_add.pig</script>
107

    
108
            <param>input=${input_2}</param>
109
            <param>schema_input=${wf:actionData('generate-schema')[wf:conf('schema_input')]}</param>
110

    
111
            <param>output=${workingDir}/union_origin_2/output</param>
112
            <param>schema_output=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
113

    
114
            <param>origin_value=${origin_2}</param>
115
        </pig>
116
        <ok to="union_origin_join"/>
117
        <error to="fail"/>
118
    </action>
119

    
120
    <decision name="decision_union_origin_3">
121
        <switch>
122
            <case to="union_origin_join">${origin_3 eq "$UNDEFINED$"}</case>
123
            <default to="union_origin_3"/>
124
        </switch>
125
    </decision>
126
    
127
    <action name="union_origin_3">
128
        <pig>
129
			<!-- The data generated by this node is deleted in this section -->
130
			<prepare>
131
				<delete path="${nameNode}${workingDir}/union_origin_3" />
132
				<delete path="${nameNode}${output}" />
133
				<mkdir path="${nameNode}${workingDir}/union_origin_3" />
134
			</prepare>
135
            <!-- Path to PIG script the workflow executes. -->
136
            <script>lib/scripts/origin_add.pig</script>
137

    
138
            <param>input=${input_3}</param>
139
            <param>schema_input=${wf:actionData('generate-schema')[wf:conf('schema_input')]}</param>
140

    
141
            <param>output=${workingDir}/union_origin_3/output</param>
142
            <param>schema_output=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
143

    
144
            <param>origin_value=${origin_3}</param>
145
        </pig>
146
        <ok to="union_origin_join"/>
147
        <error to="fail"/>
148
    </action>
149
    
150
    <join name="union_origin_join" to="decision_union_merge"/>
151
    
152
    <decision name="decision_union_merge">
153
        <switch>
154
            <case to="union_merge_2">${origin_3 eq "$UNDEFINED$"}</case>
155
            <default to="union_merge_3"/>
156
        </switch>
157
    </decision>
158
    
159
    <action name="union_merge_2">
160
        <pig>
161
			<!-- The data generated by this node is deleted in this section -->
162
			<prepare>
163
				<delete path="${nameNode}${output}" />
164
			</prepare>
165
            <!-- Path to PIG script the workflow executes. -->
166
            <script>lib/scripts/union2.pig</script>
167

    
168
            <param>input_1=${workingDir}/union_origin_1/output</param>
169
            <param>input_2=${workingDir}/union_origin_2/output</param>
170
            <param>schema_input=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
171

    
172
            <param>output=${output}</param>
173
            <param>schema_output=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
174
        </pig>
175
        <ok to="end"/>
176
        <error to="fail"/>
177
    </action>
178
    
179
    <action name="union_merge_3">
180
        <pig>
181
			<!-- The data generated by this node is deleted in this section -->
182
			<prepare>
183
				<delete path="${nameNode}${output}" />
184
			</prepare>
185
            <!-- Path to PIG script the workflow executes. -->
186
            <script>lib/scripts/union3.pig</script>
187

    
188
            <param>input_1=${workingDir}/union_origin_1/output</param>
189
            <param>input_2=${workingDir}/union_origin_2/output</param>
190
            <param>input_3=${workingDir}/union_origin_3/output</param>
191
            <param>schema_input=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
192

    
193
            <param>output=${output}</param>
194
            <param>schema_output=${wf:actionData('generate-schema')[wf:conf('schema_output')]}</param>
195
        </pig>
196
        <ok to="end"/>
197
        <error to="fail"/>
198
    </action>
199

    
200
    <kill name="fail">
201
		<message>Unfortunately, the workflow failed -- error message:
202
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
203
    </kill>
204

    
205
    <end name="end"/>
206
</workflow-app>
    (1-1/1)