Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-origins_collapser_default">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
			<arg>-C{input,
24
				eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope,
25
				eu/dnetlib/iis/collapsers/origins_collapser/default/data/metadata.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS -->
27
            <arg>-Oinput=${workingDir}/producer/input</arg>
28
        </java>
29
        <ok to="origins_collapser"/>
30
        <error to="fail"/>
31
    </action>
32
    <action name="origins_collapser">
33
        <sub-workflow>
34
            <app-path>${wf:appPath()}/origins_collapser</app-path>
35
            <configuration>
36
                <property>
37
                    <name>jobTracker</name>
38
                    <value>${jobTracker}</value>
39
                </property>
40
                <property>
41
                    <name>nameNode</name>
42
                    <value>${nameNode}</value>
43
                </property>
44
                <property>
45
                    <name>queueName</name>
46
                    <value>${queueName}</value>
47
                </property>
48
                <!-- Working directory of the subworkflow -->                
49
                <property>
50
                    <name>workingDir</name>
51
                    <value>${workingDir}/origins_collapser/working_dir</value>
52
                </property>
53
                <property>
54
                    <name>blocking_field</name>
55
                    <value>data.id</value>
56
                </property>
57
                <property>
58
                    <name>origins</name>
59
                    <value>orig,cermine</value>
60
                </property>
61
                <property>
62
                    <name>schema_input</name>
63
                    <value>eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope</value>
64
                </property>
65
                <property>
66
                    <name>schema_output</name>
67
                    <value>eu.dnetlib.iis.importer.schemas.DocumentMetadata</value>
68
                </property>
69
                <property>
70
                    <name>record_collapser</name>
71
                    <value>eu.dnetlib.iis.collapsers.origins.OriginConfidenceCollapser</value>
72
                </property>
73
                <!-- Input ports. -->
74
                <property>
75
                    <name>input</name>
76
                    <value>${workingDir}/producer/input</value>
77
                </property>
78
                <!-- Output port bound to given path -->
79
                <property>
80
                    <name>output</name>
81
                    <value>${workingDir}/origins_collapser/output</value>
82
                </property>
83
            </configuration>
84
        </sub-workflow>
85
        <ok to="consumer"/>
86
        <error to="fail"/>
87
    </action>
88
    <action name="consumer">
89
		<java>
90
			<job-tracker>${jobTracker}</job-tracker>
91
			<name-node>${nameNode}</name-node>
92
			<configuration>
93
				<property>
94
					<name>mapred.job.queue.name</name>
95
					<value>${queueName}</value>
96
				</property>
97
			</configuration>
98
			<!-- This is simple wrapper for the Java code -->
99
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
100
			<!-- The business Java code that gets to be executed -->
101
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
102
			<!-- Specification of the input ports -->
103
			<arg>-C{output,
104
				eu.dnetlib.iis.importer.schemas.DocumentMetadata,
105
				eu/dnetlib/iis/collapsers/origins_collapser/default/data/output.json}</arg>
106
    		<!-- All input and output ports have to be bound to paths in HDFS -->
107
			<arg>-Ioutput=${workingDir}/origins_collapser/output</arg>
108
		</java>
109
		<ok to="end" />
110
		<error to="fail" />
111
	</action>
112
    <kill name="fail">
113
		<message>Unfortunately, the workflow failed -- error message:
114
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
115
    </kill>
116
    <end name="end"/>
117
</workflow-app>
(2-2/2)