Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="ae261797-c373-48ce-8581-f1b66f702323_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Offline Deduplication</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15

    
16
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
17
				<DESCRIPTION>Set table name</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
20
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="cleanupMap" />
24
				</ARCS>
25
			</NODE>			
26
			<NODE name="setPath" type="SetHdfsPathJob" isStart="true">
27
				<DESCRIPTION>Set map path name</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="path" managedBy="user">/tmp/anchorMap</PARAM>
30
					<PARAM required="true" type="string" name="pathParam" managedBy="system">mapPath</PARAM>
31
				</PARAMETERS>
32
				<ARCS>
33
					<ARC to="cleanupMap" />
34
				</ARCS>
35
			</NODE>	
36
			
37
			<!-- postprocess -->
38
			
39
			<NODE name="cleanupMap" type="DeleteHdfsPathJob" isJoin="true">
40
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
41
				<PARAMETERS>
42
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
43
					<PARAM required="true" type="string" name="envParams" managedBy="system">
44
						{ 	
45
							'path' : 'mapPath'
46
						}
47
					</PARAM>					
48
				</PARAMETERS>
49
				<ARCS>
50
					<ARC to="buildAnchorMap" />
51
				</ARCS>
52
			</NODE>			
53
 
54
			<NODE name="buildAnchorMap" type="SubmitHadoopJob">
55
				<DESCRIPTION>redirect rels</DESCRIPTION>
56
				<PARAMETERS>
57
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">buildMergedToAnchorMapJob</PARAM>
58
					<PARAM required="true" type="string" name="envParams" managedBy="system">
59
						{ 	
60
							'cluster' : 'cluster',
61
							'hbase.mapred.inputtable' : 'tableName', 
62
							'hbase.mapreduce.inputtable' : 'tableName', 
63
							'hbase.mapred.outputtable' : 'tableName', 
64
							'hbase.mapreduce.outputtable' : 'tableName',
65
							'mapred.output.dir' : 'mapPath'												
66
						}
67
					</PARAM>					
68
				</PARAMETERS>
69
				<ARCS>
70
					<ARC to="success" />
71
				</ARCS>
72
			</NODE>
73

    
74

    
75
        </CONFIGURATION>
76
        <STATUS />
77
    </BODY>
78
</RESOURCE_PROFILE>
(4-4/25)