Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="80f76a00-0eb3-4df0-a256-99f1df7b5fd8_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME menuSection="InfoSpace Deduplication">InfoSpace Deduplication</WORKFLOW_NAME>
11
		<WORKFLOW_DESCRIPTION>InfoSpace Deduplication</WORKFLOW_DESCRIPTION>
12
		<WORKFLOW_INFO/>
13
		<WORKFLOW_FAMILY>InfoSpace Deduplication</WORKFLOW_FAMILY>
14
		<WORKFLOW_PRIORITY>35</WORKFLOW_PRIORITY>
15
		<!-- <ADMIN_EMAIL>alessia.bardi@isti.cnr.it,claudio.atzori@isti.cnr.it</ADMIN_EMAIL> -->
16
		<CONFIGURATION status="EXECUTABLE" start="MANUAL">
17

    
18
			<PARAMETERS>
19
				<PARAM name="table" description="HBase table to be dropped" required="true" managedBy="user"  type="string"/>
20
				<PARAM name="cluster" description="Hadoop cluster logical name" required="true" managedBy="user"   type="string" function="validValues(['DM','IIS'])"/>
21
				<PARAM name="reuseRegionInfo" description="Reuse table splits" required="true" managedBy="user" type="boolean" />
22

    
23
				<PARAM name="mappingPublication" description="xslt mapping for publications" required="true" type="string" managedBy="user" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI2HBASE:')"/>
24
				<PARAM name="reusePublication" description="reuse publications on HDFS?" required="true" type="boolean" managedBy="user"/>
25
				<PARAM name="hdfsPathPublication" description="hdfs path for publications" required="true" type="string" managedBy="system">/tmp/pmf.dli.seq</PARAM>
26

    
27
				<PARAM name="reuseResolvedPublication" description="reuse resolved publications on HDFS?" required="true" type="boolean" managedBy="user"/>
28
                <PARAM name="hdfsPathResolvedPublication" description="hdfs path for resolved publications"
29
                       required="true" type="string" managedBy="system">/user/sandro/dli/pmf.dli.resolved.seq
30
                </PARAM>
31

    
32
				<PARAM name="mappingDataset" description="xslt mapping for datasets" required="true" type="string" managedBy="user" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI2HBASE:')"/>
33
				<PARAM name="reuseDataset" description="reuse datasets on HDFS?" required="true" type="boolean" managedBy="user"/>
34
				<PARAM name="hdfsPathDataset" description="hdfs path for datasets" required="true" type="string" managedBy="system">/tmp/dmf.dli.seq</PARAM>
35

    
36
				<PARAM name="reuseResolvedDataset" description="reuse resolved datasets on HDFS?" required="true" type="boolean" managedBy="user"/>
37
				<PARAM name="hdfsPathResolvedDataset" description="hdfs path for resolved datasets" required="true" type="string" managedBy="system">/tmp/dmf.dli.resolved.seq</PARAM>
38

    
39
				<PARAM name="mappingUnresolved" description="xslt mapping for unresolved objects" required="true" type="string" managedBy="user" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI2HBASE:')"/>
40
				<PARAM name="reuseUnresolved" description="reuse unresolved objects on HDFS?" required="true" type="boolean" managedBy="user"/>
41
				<PARAM name="hdfsPathUnresolved" description="hdfs path for unresolved objects" required="true" type="string" managedBy="system">/tmp/scholix.dli.seq</PARAM>
42

    
43
                <PARAM name="dedupConfigDataset" description="dedup configuration orchestration name" required="true"
44
                       type="string" function="obtainValues('dedupOrchestrations', {})" managedBy="user"/>
45
                <PARAM name="dedupConfigPublication" description="dedup configuration orchestration name"
46
                       required="true" type="string" function="obtainValues('dedupOrchestrations', {})"
47
                       managedBy="user"/>
48
                <PARAM name="dedupConfigUnknown" description="dedup configuration orchestration name" required="true"
49
                       type="string" function="obtainValues('dedupOrchestrations', {})" managedBy="user"/>
50
                <PARAM name="minDistWorkDir" description="work directory for the minDist algorithm" required="true"
51
                       type="string" managedBy="user">/user/dnet/dli/dedup/prod/mindist
52
                </PARAM>
53
			</PARAMETERS>
54
			<WORKFLOW>
55

    
56
				<NODE name="resetHbase" type="LaunchWorkflowTemplate" isStart="true">
57
					<DESCRIPTION>Reset HBase table</DESCRIPTION>
58
					<PARAMETERS>
59
						<PARAM name="wfTemplateId" value="75345aba-c069-43f4-90aa-e13688d9845e_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
60
						<PARAM name="wfTemplateParams">
61
							<MAP>
62
								<ENTRY key="table" ref="table"/>
63
								<ENTRY key="cluster" ref="cluster"/>
64
								<ENTRY key="reuseRegionInfo" ref="reuseRegionInfo"/>
65
							</MAP>
66
						</PARAM>
67
					</PARAMETERS>
68

    
69
					<ARCS>
70
                        <ARC to="pmf2hbase"/>
71
					</ARCS>
72
				</NODE>
73

    
74

    
75
				<NODE name="pmf2hbase" type="LaunchWorkflowTemplate">
76
					<DESCRIPTION>PMF Publications to HBase</DESCRIPTION>
77
					<PARAMETERS>
78
						<PARAM name="wfTemplateId" value="5b05a65a-4eeb-4862-bc55-b35c7ec3baf0_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
79
						<PARAM name="wfTemplateParams">
80
							<MAP>
81
								<ENTRY key="hbaseTable" ref="table"/>
82
								<ENTRY key="cluster" ref="cluster"/>
83
								<ENTRY key="mdFormat" value="PMF"/>
84
								<ENTRY key="mapping" ref="mappingPublication"/>
85
								<ENTRY key="reuseMdRecords" ref="reusePublication"/>
86
								<ENTRY key="hdfsPath" ref="hdfsPathPublication"/>
87
							</MAP>
88
						</PARAM>
89
					</PARAMETERS>
90
					<ARCS>
91
						<ARC to="dmf2hbase"/>
92
					</ARCS>
93
				</NODE>
94
				<NODE name="dmf2hbase" type="LaunchWorkflowTemplate">
95
					<DESCRIPTION>DMF Datasets to HBase</DESCRIPTION>
96
					<PARAMETERS>
97
						<PARAM name="wfTemplateId" value="5b05a65a-4eeb-4862-bc55-b35c7ec3baf0_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
98
						<PARAM name="wfTemplateParams">
99
							<MAP>
100
								<ENTRY key="hbaseTable" ref="table"/>
101
								<ENTRY key="cluster" ref="cluster"/>
102
								<ENTRY key="mdFormat" value="DMF"/>
103
								<ENTRY key="mapping" ref="mappingDataset"/>
104
								<ENTRY key="reuseMdRecords" ref="reuseDataset"/>
105
								<ENTRY key="hdfsPath" ref="hdfsPathDataset"/>
106
							</MAP>
107
						</PARAM>
108
					</PARAMETERS>
109
					<ARCS>
110
						<ARC to="scholix2hbase"/>
111
					</ARCS>
112
				</NODE>
113
				<NODE name="scholix2hbase" type="LaunchWorkflowTemplate">
114
					<DESCRIPTION>ODF Datasets to HBase</DESCRIPTION>
115
					<PARAMETERS>
116
						<PARAM name="wfTemplateId" value="5b05a65a-4eeb-4862-bc55-b35c7ec3baf0_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
117
						<PARAM name="wfTemplateParams">
118
							<MAP>
119
								<ENTRY key="hbaseTable" ref="table"/>
120
								<ENTRY key="cluster" ref="cluster"/>
121
								<ENTRY key="mdFormat" value="Scholix"/>
122
								<ENTRY key="mapping" ref="mappingUnresolved"/>
123
								<ENTRY key="reuseMdRecords" ref="reuseUnresolved"/>
124
								<ENTRY key="hdfsPath" ref="hdfsPathUnresolved"/>
125
							</MAP>
126
						</PARAM>
127
					</PARAMETERS>
128
					<ARCS>
129
						<ARC to="pmfResolved2hbase"/>
130
					</ARCS>
131
				</NODE>
132

    
133
				<NODE name="pmfResolved2hbase" type="LaunchWorkflowTemplate">
134
					<DESCRIPTION>PMF Resolved Publications to HBase</DESCRIPTION>
135
					<PARAMETERS>
136
						<PARAM name="wfTemplateId"
137
							   value="5b05a65a-4eeb-4862-bc55-b35c7ec3baf0_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
138
						<PARAM name="wfTemplateParams">
139
							<MAP>
140
								<ENTRY key="hbaseTable" ref="table"/>
141
								<ENTRY key="cluster" ref="cluster"/>
142
								<ENTRY key="mdFormat" value="PMF"/>
143
								<ENTRY key="interpretation" value="resolved"/>
144
								<ENTRY key="mapping" ref="mappingPublication"/>
145
								<ENTRY key="reuseMdRecords" ref="reuseResolvedPublication"/>
146
								<ENTRY key="hdfsPath" ref="hdfsPathResolvedPublication"/>
147
							</MAP>
148
						</PARAM>
149
					</PARAMETERS>
150
					<ARCS>
151
						<ARC to="dmfResolved2hbase"/>
152
					</ARCS>
153
				</NODE>
154

    
155
				<NODE name="dmfResolved2hbase" type="LaunchWorkflowTemplate">
156
					<DESCRIPTION>DMF Resolved Datasets to HBase</DESCRIPTION>
157
					<PARAMETERS>
158
						<PARAM name="wfTemplateId" value="5b05a65a-4eeb-4862-bc55-b35c7ec3baf0_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
159
						<PARAM name="wfTemplateParams">
160
							<MAP>
161
								<ENTRY key="hbaseTable" ref="table"/>
162
								<ENTRY key="cluster" ref="cluster"/>
163
								<ENTRY key="mdFormat" value="DMF"/>
164
								<ENTRY key="interpretation" value="resolved"/>
165
								<ENTRY key="mapping" ref="mappingDataset"/>
166
								<ENTRY key="reuseMdRecords" ref="reuseResolvedDataset"/>
167
								<ENTRY key="hdfsPath" ref="hdfsPathResolvedDataset"/>
168
							</MAP>
169
						</PARAM>
170
					</PARAMETERS>
171
					<ARCS>
172
						<ARC to="mergeCollectedFrom"/>
173
					</ARCS>
174
				</NODE>
175

    
176

    
177
				<NODE name="mergeCollectedFrom" type="LaunchWorkflowTemplate">
178
					<DESCRIPTION>Deduplication</DESCRIPTION>
179
					<PARAMETERS>
180
						<PARAM name="wfTemplateId"
181
							   value="7748c68f-bb1a-4ef4-a95d-960cd0ea9ce8_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
182
						<PARAM name="wfTemplateParams">
183
							<MAP>
184
								<ENTRY key="hbaseTable" ref="table"/>
185
								<ENTRY key="cluster" ref="cluster"/>
186
							</MAP>
187
						</PARAM>
188
					</PARAMETERS>
189
					<ARCS>
190
						<ARC to="deduplicateDataset"/>
191
					</ARCS>
192
				</NODE>
193

    
194

    
195
                <NODE name="deduplicatePublication" type="LaunchWorkflowTemplate">
196
					<DESCRIPTION>Deduplication</DESCRIPTION>
197
					<PARAMETERS>
198
						<PARAM name="wfTemplateId" value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
199
						<PARAM name="wfTemplateParams">
200
							<MAP>
201
								<ENTRY key="hbaseTable" ref="table"/>
202
								<ENTRY key="cluster" ref="cluster"/>
203
                                <ENTRY key="dedupConfigSequence" ref="dedupConfigPublication"/>
204
								<ENTRY key="minDistWorkDir" ref="minDistWorkDir"/>
205
								<ENTRY key="mappingSimilarities" ref="mappingSimilarities"/>
206
								<ENTRY key="mappingDissimilarities" ref="mappingDissimilarities"/>
207
							</MAP>
208
						</PARAM>
209
					</PARAMETERS>
210
					<ARCS>
211
                        <ARC to="deduplicateDataset"/>
212
					</ARCS>
213
				</NODE>
214

    
215

    
216
                <NODE name="deduplicateDataset" type="LaunchWorkflowTemplate">
217
                    <DESCRIPTION>Deduplication</DESCRIPTION>
218
					<PARAMETERS>
219
                        <PARAM name="wfTemplateId"
220
                               value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
221
						<PARAM name="wfTemplateParams">
222
							<MAP>
223
								<ENTRY key="hbaseTable" ref="table"/>
224
								<ENTRY key="cluster" ref="cluster"/>
225
                                <ENTRY key="dedupConfigSequence" ref="dedupConfigDataset"/>
226
                                <ENTRY key="minDistWorkDir" ref="minDistWorkDir"/>
227
                                <ENTRY key="mappingSimilarities" ref="mappingSimilarities"/>
228
                                <ENTRY key="mappingDissimilarities" ref="mappingDissimilarities"/>
229
                            </MAP>
230
                        </PARAM>
231
                    </PARAMETERS>
232
                    <ARCS>
233
                        <ARC to="deduplicateUnknown"/>
234
                    </ARCS>
235
                </NODE>
236

    
237
                <NODE name="deduplicateUnknown" type="LaunchWorkflowTemplate">
238
                    <DESCRIPTION>Deduplication</DESCRIPTION>
239
                    <PARAMETERS>
240
                        <PARAM name="wfTemplateId"
241
                               value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
242
                        <PARAM name="wfTemplateParams">
243
                            <MAP>
244
                                <ENTRY key="hbaseTable" ref="table"/>
245
                                <ENTRY key="cluster" ref="cluster"/>
246
                                <ENTRY key="dedupConfigSequence" ref="dedupConfigUnknown"/>
247
                                <ENTRY key="minDistWorkDir" ref="minDistWorkDir"/>
248
                                <ENTRY key="mappingSimilarities" ref="mappingSimilarities"/>
249
                                <ENTRY key="mappingDissimilarities" ref="mappingDissimilarities"/>
250
                            </MAP>
251
                        </PARAM>
252
                    </PARAMETERS>
253
                    <ARCS>
254
                        <ARC to="promoteActions"/>
255
                    </ARCS>
256
                </NODE>
257

    
258
                <NODE name="promoteActions" type="PromoteActionsHDFS">
259
                    <DESCRIPTION>Promote Actions</DESCRIPTION>
260
                    <PARAMETERS>
261
                        <PARAM name="set" ref="actionSets"/>
262
                        <PARAM name="tableName" ref="table"/>
263
                    </PARAMETERS>
264
                    <ARCS>
265
                        <ARC to="closeMesh"/>
266
                    </ARCS>
267
                </NODE>
268
                <NODE name="closeMesh" type="LaunchWorkflowTemplate">
269
                    <DESCRIPTION>close mesh</DESCRIPTION>
270
                    <PARAMETERS>
271
                        <PARAM name="wfTemplateId"
272
                               value="70274106-375d-4135-9de1-536a606b327b_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
273
                        <PARAM name="wfTemplateParams">
274
                            <MAP>
275
                                <ENTRY key="hbaseTable" ref="table"/>
276
                                <ENTRY key="cluster" ref="cluster"/>
277
                                <ENTRY key="workDir" ref="minDistWorkDir"/>
278
                                <ENTRY key="entitySequence" value="publication,dataset,unknown"/>
279
                            </MAP>
280
                        </PARAM>
281
                    </PARAMETERS>
282
                    <ARCS>
283
                        <ARC to="updateIndex"/>
284
                    </ARCS>
285
                </NODE>
286

    
287
                <NODE name="updateIndex" type="LaunchWorkflowTemplate">
288
                    <DESCRIPTION>update provision</DESCRIPTION>
289
                    <PARAMETERS>
290
                        <PARAM name="wfTemplateId"
291
                               value="bf449f4c-2da5-466f-aabc-e2269980e6b2_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
292
                        <PARAM name="wfTemplateParams">
293
                            <MAP>
294
                                <ENTRY key="hbaseTable" ref="table"/>
295
                                <ENTRY key="cluster" ref="cluster"/>
296
							</MAP>
297
						</PARAM>
298
					</PARAMETERS>
299
					<ARCS>
300
						<ARC to="success"/>
301
					</ARCS>
302
				</NODE>
303

    
304

    
305
			</WORKFLOW>
306
		</CONFIGURATION>
307
		<NOTIFICATIONS/>
308
		<SCHEDULING enabled="false">
309
			<CRON>29 5 22 ? * *</CRON>
310
			<MININTERVAL>10080</MININTERVAL>
311
		</SCHEDULING>
312
		<STATUS/>
313
	</BODY>
314
</RESOURCE_PROFILE>
(1-1/5)