Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="551902ae-7f01-401a-a714-9b16ae5ae37e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Update dedup Index</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15

    
16
			<NODE name="findIndex" type="FindIndex" isStart="true">
17
				<DESCRIPTION />
18
				<PARAMETERS>
19
					<PARAM name="mdFormat" type="string" managedBy="user" required="true"></PARAM>
20
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
21
					<PARAM name="interpretation" type="string" managedBy="system" required="true">dedup</PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC name="found" to="prepare" />
25
					<ARC name="notFound" to="createIndex" />
26
				</ARCS>
27
			</NODE>
28

    
29
			<NODE name="createIndex" type="CreateIndex">
30
				<DESCRIPTION />
31
				<PARAMETERS />
32
				<ARCS>
33
					<ARC to="prepare" />
34
				</ARCS>
35
			</NODE>
36

    
37
			<NODE name="prepare" type="PrepareDedupIndexJob">
38
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
39
				<PARAMETERS>
40
					<PARAM name="dedupConfig" function="obtainValues('dedupOrchestrations', {})" type="string" required="true" managedBy="user"></PARAM>
41
					<PARAM name="rottenRecordsPathParam"  type="string" required="true" managedBy="system">rottenRecordsPath</PARAM>
42
				</PARAMETERS>
43
				<ARCS>
44
					<ARC to="cleanupRotten" />
45
				</ARCS>
46
			</NODE>
47

    
48
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
49
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
50
				<PARAMETERS>
51
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
52
					<PARAM required="true" type="string" name="envParams" managedBy="system">
53
						{ 	
54
							'path' : 'rottenRecordsPath'
55
						}
56
					</PARAM>					
57
				</PARAMETERS>
58
				<ARCS>
59
					<ARC to="updateIndex" />
60
				</ARCS>
61
			</NODE>
62
		
63
			<NODE name="updateIndex" type="SubmitHadoopJob">
64
				<DESCRIPTION>M/O index records</DESCRIPTION>
65
				<PARAMETERS>
66
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
67
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupIndexFeedJob</PARAM>					
68
					<PARAM required="true" type="string" name="envParams" managedBy="system">
69
						{ 	
70
							'mapred.output.dir' : 'rottenRecordsPath',
71
							'index.fields' : 'index.fields',
72
							'index.solr.url' : 'index.solr.url',
73
							'index.solr.collection' : 'index.solr.collection',
74
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
75
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
76
							'index.solr.sim.mode' : 'index.solr.sim.mode',
77
							'index.feed.timestamp' : 'index.feed.timestamp',
78
							'entityTypeId' : 'entityTypeId',
79
							'entityType' : 'entityType',
80
							'actionset' : 'actionset'
81
						}
82
					</PARAM>
83
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
84
						{ 	
85
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
86
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
87
						}
88
					</PARAM>
89
				</PARAMETERS>
90
				<ARCS>
91
					<ARC to="finalize" />
92
				</ARCS>
93
			</NODE>
94

    
95
			<NODE name="finalize" type="FinalizeDedupIndexFeeding">
96
				<DESCRIPTION>commit changes</DESCRIPTION>
97
				<PARAMETERS />
98
				<ARCS>
99
					<ARC to="updateDs" />
100
				</ARCS>
101
			</NODE>
102

    
103
			<NODE name="updateDs" type="IndexDsUpdateJob">
104
				<DESCRIPTION>update DS</DESCRIPTION>
105
				<PARAMETERS />
106
				<ARCS>
107
					<ARC to="success" />
108
				</ARCS>
109
			</NODE>
110
	
111
		</CONFIGURATION>
112
		<STATUS />
113
	</BODY>
114
</RESOURCE_PROFILE>
115

    
116

    
(5-5/23)