Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="07093922-e99e-41fc-b931-41019a82f736_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Reindex Deduplication</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
        
16
        
17
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
18
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
19
				<PARAMETERS>
20
					<PARAM function="obtainValues('dedupOrchestrations', {})" required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
21
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="checkConf" />
25
				</ARCS>
26
			</NODE>
27
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
28
				<DESCRIPTION>Set table name</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
31
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="checkConf" />
35
				</ARCS>
36
			</NODE>			
37
        	<NODE name="checkConf" type="DedupCheckConfiguration" isJoin="true">
38
				<DESCRIPTION/>
39
				<PARAMETERS>
40
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
41
				</PARAMETERS>
42
				<ARCS>
43
					<ARC to="prepareActionSet" />
44
				</ARCS>
45
			</NODE>        
46
        
47
			<NODE name="prepareActionSet" type="PrepareConfiguredActionSet">				
48
				<DESCRIPTION>prepare action sets</DESCRIPTION>
49
				<PARAMETERS>
50
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
51
					<PARAM required="true" type="string" name="jobProperty" managedBy="system">rawSetId</PARAM>
52
				</PARAMETERS>
53
				<ARCS>
54
					<ARC to="findIndex" />
55
				</ARCS>
56
			</NODE>        
57
			
58
			<NODE name="findIndex" type="FindIndex">
59
				<DESCRIPTION />
60
				<PARAMETERS>
61
					<PARAM name="mdFormat" type="string" managedBy="system" required="true">OPENAIRE</PARAM>
62
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
63
					<PARAM name="interpretation" type="string" managedBy="system" required="true">dedup</PARAM>
64
				</PARAMETERS>
65
				<ARCS>
66
					<ARC name="found" to="prepareIndex" />
67
					<ARC name="notFound" to="createIndex" />
68
				</ARCS>
69
			</NODE>
70

    
71
			<NODE name="createIndex" type="CreateIndex">
72
				<DESCRIPTION />
73
				<PARAMETERS />
74
				<ARCS>
75
					<ARC to="prepareIndex" />
76
				</ARCS>
77
			</NODE>
78

    
79
			<NODE name="prepareIndex" type="PrepareDedupIndexJob">
80
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
81
				<PARAMETERS>
82
					<PARAM name="rottenRecordsPathParam"  type="string" required="true" managedBy="system">rottenRecordsPath</PARAM>
83
					<PARAM required="true" type="string" name="dedupConfig" managedBy="system">
84
						{ 	
85
							'dedupConfig' : 'dedup.conf'
86
						}
87
					</PARAM>					
88
				</PARAMETERS>
89
				<ARCS>
90
					<ARC to="cleanupRotten" />
91
				</ARCS>
92
			</NODE>
93

    
94
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
95
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
96
				<PARAMETERS>
97
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
98
					<PARAM required="true" type="string" name="envParams" managedBy="system">
99
						{ 	
100
							'path' : 'rottenRecordsPath'
101
						}
102
					</PARAM>					
103
				</PARAMETERS>
104
				<ARCS>
105
					<ARC to="updateIndex" />
106
				</ARCS>
107
			</NODE>
108
		
109
			<NODE name="updateIndex" type="SubmitHadoopJob">
110
				<DESCRIPTION>M/O index records</DESCRIPTION>
111
				<PARAMETERS>
112
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupIndexFeedJob</PARAM>					
113
					<PARAM required="true" type="string" name="envParams" managedBy="system">
114
						{ 	
115
							'mapred.output.dir' : 'rottenRecordsPath',
116
							'index.fields' : 'index.fields',
117
							'index.solr.url' : 'index.solr.url',
118
							'index.solr.collection' : 'index.solr.collection',
119
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
120
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
121
							'index.solr.sim.mode' : 'index.solr.sim.mode',
122
							'index.feed.timestamp' : 'index.feed.timestamp',
123
							'entityTypeId' : 'entityTypeId',
124
							'entityType' : 'entityType',
125
							'actionset' : 'actionset',
126
							'cluster' : 'cluster'
127
						}
128
					</PARAM>
129
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
130
						{ 	
131
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
132
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
133
						}
134
					</PARAM>
135
				</PARAMETERS>
136
				<ARCS>
137
					<ARC to="finalize" />
138
				</ARCS>
139
			</NODE>
140

    
141
			<NODE name="finalize" type="FinalizeDedupIndexFeeding">
142
				<DESCRIPTION>commit changes</DESCRIPTION>
143
				<PARAMETERS />
144
				<ARCS>
145
					<ARC to="updateDs" />
146
				</ARCS>
147
			</NODE>
148

    
149
			<NODE name="updateDs" type="IndexDsUpdateJob">
150
				<DESCRIPTION>update DS</DESCRIPTION>
151
				<PARAMETERS />
152
				<ARCS>
153
					<ARC to="success" />
154
				</ARCS>
155
			</NODE>
156

    
157
						
158
        </CONFIGURATION>
159
        <STATUS />
160
    </BODY>
161
</RESOURCE_PROFILE>
(27-27/32)