Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="mainworkflows_importer_plaintext_cached_by_checksum">
2
	
3
	<parameters>
4
		<property>
5
			<name>input</name>
6
			<description>input document content directory</description>
7
		</property>
8
		<property>
9
			<name>output</name>
10
			<description>plaintext importer output directory</description>
11
		</property>
12
		<property>
13
			<name>content_connection_timeout</name>
14
			<value>60000</value>
15
			<description>streaming content connection timeout</description>
16
		</property>
17
		<property>
18
			<name>content_read_timeout</name>
19
			<value>60000</value>
20
			<description>streaming content read timeout</description>
21
		</property>
22
		<property>
23
			<name>zk_session_timeout</name>
24
			<value>60000</value>
25
			<description>zookeeper session timeout when handling locks</description>
26
		</property>
27
		<property>
28
			<name>default_cache_location</name>
29
			<description>default cache location stored in HDFS</description>
30
		</property>
31
		<property>
32
			<name>mapred_max_split_size</name>
33
			<value>50000</value>
34
			<description>maximum input data split size, required by streaming version reading DocumentContentUrl to split input data into more chunks</description>
35
		</property>
36
	</parameters>
37

    
38
	<global>
39
        <job-tracker>${jobTracker}</job-tracker>
40
        <name-node>${nameNode}</name-node>
41
        <configuration>
42
            <property>
43
                <name>mapred.job.queue.name</name>
44
                <value>${queueName}</value>
45
            </property>
46
		</configuration>
47
	</global>
48

    
49
	<start to="preprocessing" />
50
		
51
	<action name="preprocessing">
52
		<sub-workflow>
53
            <app-path>${wf:appPath()}/transformers_metadataextraction_checksum_preprocessing</app-path>
54
            <propagate-configuration/>
55
            <configuration>
56
            	<property>
57
                    <name>workingDir</name>
58
                    <value>${workingDir}/preprocessing/working_dir</value>
59
                </property>
60
                <property>
61
					<name>input</name>
62
					<value>${input}</value>
63
				</property>
64
				<property>
65
					<name>output</name>
66
					<value>${workingDir}/preprocessing/output</value>
67
				</property>
68
            </configuration>
69
        </sub-workflow>
70
		<ok to="mainworkflows_importer_plaintext_cached"/>
71
		<error to="fail" />
72
	</action>
73
	
74
	<action name="mainworkflows_importer_plaintext_cached">
75
		<sub-workflow>
76
            <app-path>${wf:appPath()}/mainworkflows_importer_plaintext_cached</app-path>
77
            <propagate-configuration/>
78
            <configuration>
79
            	<property>
80
                    <name>workingDir</name>
81
                    <value>${workingDir}/mainworkflows_importer_plaintext_cached/working_dir</value>
82
                </property>
83
                <property>
84
                	<!-- checksum identified input -->
85
					<name>input</name>
86
					<value>${workingDir}/preprocessing/output</value>
87
				</property>
88
				<property>
89
					<name>output</name>
90
					<value>${workingDir}/mainworkflows_importer_plaintext_cached/out</value>
91
				</property>
92
				<!-- all the other properties are autmatically propagated-->
93
            </configuration>
94
        </sub-workflow>
95
		<ok to="postprocessing-text"/>
96
		<error to="fail" />
97
	</action>
98
    
99
    <action name="postprocessing-text">
100
		<sub-workflow>
101
            <app-path>${wf:appPath()}/transformers_metadataextraction_checksum_postprocessing_text</app-path>
102
            <propagate-configuration/>
103
            <configuration>
104
            	<property>
105
                    <name>workingDir</name>
106
                    <value>${workingDir}/postprocessing_text/working_dir</value>
107
                </property>
108
                <property>
109
					<name>input_document_content_url</name>
110
					<value>${input}</value>
111
				</property>
112
				<property>
113
					<name>input_document_text</name>
114
					<value>${workingDir}/mainworkflows_importer_plaintext_cached/out</value>
115
				</property>
116
				<property>
117
					<name>output</name>
118
					<value>${output}</value>
119
				</property>
120
            </configuration>
121
        </sub-workflow>
122
		<ok to="end"/>
123
		<error to="fail" />
124
	</action>
125
    
126
	<kill name="fail">
127
		<message>Unfortunately, the process failed -- error message:
128
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
129
	</kill>
130
	<end name="end" />
131
</workflow-app>
(2-2/2)