Project

General

Profile

1
<?xml version="1.0"?>
2
<!-- Note that documentation placed in comments in this file uses the
3
"markdown" syntax (along with its way of dividing text into sections). -->
4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-referenceextraction_project_main_sampletest">
5
	<start to="producer" />
6

    
7
	<action name="producer">
8
		<java>
9
			<job-tracker>${jobTracker}</job-tracker>
10
			<name-node>${nameNode}</name-node>
11
			<!-- The data generated by this node is deleted in this section -->
12
			<prepare>
13
				<delete path="${nameNode}${workingDir}/producer" />
14
				<mkdir path="${nameNode}${workingDir}/producer" />
15
			</prepare>
16
			<configuration>
17
				<property>
18
					<name>mapred.job.queue.name</name>
19
					<value>${queueName}</value>
20
				</property>
21
			</configuration>
22
			<!-- This is simple wrapper for the Java code -->
23
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
24
			<!-- The business Java code that gets to be executed -->
25
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
26
			<!-- Specification of the output ports -->
27
			<arg>-C{document_text, 
28
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
29
				eu/dnetlib/iis/referenceextraction/project/data/document_text_only_nulls.json}</arg>
30
			<arg>-C{project, 
31
				eu.dnetlib.iis.importer.schemas.Project,
32
				eu/dnetlib/iis/referenceextraction/project/data/empty.json}</arg>
33
			<!-- All input and output ports have to be bound to paths in HDFS -->
34
			<arg>-Odocument_text=${workingDir}/producer/document_text</arg>
35
			<arg>-Oproject=${workingDir}/producer/project</arg>
36
		</java>
37
		<ok to="referenceextraction_project" />
38
		<error to="fail" />
39
	</action>
40

    
41
    <action name="referenceextraction_project">
42
        <sub-workflow>
43
            <app-path>${wf:appPath()}/referenceextraction_project</app-path>
44
            <configuration>
45
                <property>
46
                    <name>jobTracker</name>
47
                    <value>${jobTracker}</value>
48
                </property>
49
                <property>
50
                    <name>nameNode</name>
51
                    <value>${nameNode}</value>
52
                </property>
53
                <property>
54
                    <name>queueName</name>
55
                    <value>${queueName}</value>
56
                </property>
57
                <!-- Working directory of the subworkflow -->
58
                <property>
59
                    <name>workingDir</name>
60
                    <value>${workingDir}/referenceextraction_project/working_dir</value>
61
                </property>
62
                <property>
63
                    <name>input_document_text</name>
64
                    <value>${workingDir}/producer/document_text</value>
65
                </property>
66
                <property>
67
                    <name>input_project</name>
68
                    <value>${workingDir}/producer/project</value>
69
                </property>
70
                <property>
71
                    <name>output_document_to_project</name>
72
                    <value>${workingDir}/referenceextraction_project/document_to_project</value>
73
                </property>
74
            </configuration>
75
        </sub-workflow>
76
        <ok to="consumer"/>
77
        <error to="fail"/>
78
    </action>
79

    
80
	<action name="consumer">
81
		<java>
82
			<job-tracker>${jobTracker}</job-tracker>
83
			<name-node>${nameNode}</name-node>
84
			<configuration>
85
				<property>
86
					<name>mapred.job.queue.name</name>
87
					<value>${queueName}</value>
88
				</property>
89
			</configuration>
90
			<!-- This is simple wrapper for the Java code -->
91
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
92
			<!-- The business Java code that gets to be executed -->
93
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
94
			<!-- All input and output ports have to be bound to paths in HDFS -->
95
			<arg>-C{document_to_project,
96
				eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject,
97
				eu/dnetlib/iis/referenceextraction/project/data/empty.json}</arg>
98
			<!-- All input and output ports have to be bound to paths in HDFS -->
99
			<arg>-Idocument_to_project=${workingDir}/referenceextraction_project/document_to_project</arg>
100
		</java>
101
		<ok to="end" />
102
		<error to="fail" />
103
	</action>
104

    
105
    <kill name="fail">
106
        <message>Unfortunately, the process failed -- error message:
107
        			[${wf:errorMessage(wf:lastErrorNode())}]
108
        		</message>
109
    </kill>
110

    
111
    <end name="end"/>
112
</workflow-app>
(2-2/2)