Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="374d07a1-2a76-470a-ab54-a7d6c35eab9b_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Deduplication export</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
        	<NODE name="setCsvPath" type="SetEnvParameter" isStart="true">
16
				<DESCRIPTION>Set the CSV file path on HDFS</DESCRIPTION>
17
				<PARAMETERS>
18
					<PARAM managedBy="system" name="parameterName" required="true" type="string">csvPath</PARAM>
19
					<PARAM managedBy="user" name="parameterValue" required="true" type="string"></PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="cleanupCsv" />
23
				</ARCS>
24
			</NODE>
25
			
26
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
27
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM function="validValues(['result', 'organization', 'person'])" required="true" type="string" name="entityType" managedBy="user"></PARAM>
30
					<PARAM required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
31
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="cleanupCsv" />
35
				</ARCS>
36
			</NODE>
37
			
38
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
39
				<DESCRIPTION>Set table name</DESCRIPTION>
40
				<PARAMETERS>
41
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
42
					<PARAM required="true" type="string" name="table" managedBy="user">db_stdl</PARAM>
43
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
44
				</PARAMETERS>
45
				<ARCS>
46
					<ARC to="cleanupCsv" />
47
				</ARCS>
48
			</NODE>				
49
			
50
			<NODE name="cleanupCsv" type="DeleteHdfsPathJob">
51
				<DESCRIPTION>CSV files cleanup</DESCRIPTION>
52
				<PARAMETERS>
53
					<PARAM required="true" type="string" name="envParams" managedBy="system">
54
						{ 	
55
							'path' : 'csvPath',
56
							'cluster' : 'cluster'
57
						}
58
					</PARAM>					
59
				</PARAMETERS>
60
				<ARCS>
61
					<ARC to="roots2CSV" />
62
				</ARCS>
63
			</NODE>
64
				
65
			<NODE name="roots2CSV" type="SubmitHadoopJob">
66
				<DESCRIPTION>export the representative entities as CSV files</DESCRIPTION>
67
				<PARAMETERS>
68
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupRootsToCSVJob</PARAM>
69
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
70
						{ 	
71
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
72
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
73
						}
74
					</PARAM>					
75
					<PARAM required="true" type="string" name="envParams" managedBy="system">
76
						{ 	
77
							'cluster' : 'cluster',
78
							'dedup.wf.conf' : 'dedup.wf.conf',
79
							'relClasses' : 'relClasses',
80
							'entityTypeId' : 'entityTypeId',
81
							'entityType' : 'entityType',
82
							'hbase.mapred.inputtable' : 'tableName', 
83
							'hbase.mapreduce.inputtable' : 'tableName',
84
							'mapred.output.dir' : 'csvPath'
85
						}
86
					</PARAM>
87
					<PARAM required="true" type="string" name="params" managedBy="system">
88
						{ 	
89
							'entityTypeId' : '50',
90
							'entityType' : 'result'						 
91
						}
92
					</PARAM>										
93
				</PARAMETERS>
94
				<ARCS>
95
					<ARC to="success" />
96
				</ARCS>
97
			</NODE>
98
		</CONFIGURATION>
99
		<STATUS />
100
	</BODY>
101
</RESOURCE_PROFILE>
102

    
103

    
104

    
105

    
106

    
107

    
(3-3/10)