Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-06-25T10:24:32+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11

    
12
			<PARAMETERS>
13

    
14
				<PARAM name="hbaseTable"    description="HBase table name" required="true" type="string" />
15
				<PARAM name="cluster"       description="Hadoop cluster name" required="true" type="string" />
16

    
17
				<PARAM name="dedupConfigSequence" description="dedup configuration orchestration name" required="true" type="string" />
18
				<PARAM name="minDistWorkDir" description="work directory for the minDist algorithm" required="true" type="string" />
19

    
20
				<PARAM name="mappingSimilarities"       description="xslt mapping function for similarities" required="true" type="string" />
21
				<PARAM name="mappingDissimilarities"    description="xslt mapping function for dissimilarities" required="true" type="string" />
22

    
23
				<PARAM name="mdFormat"       description="dedup index mdFormat" required="true" type="string" default="OPENAIRE" />
24
				<PARAM name="layout"         description="dedup index layout" required="true" type="string" default="index" />
25
				<PARAM name="interpretation" description="dedup index interpretation" required="true" type="string" default="dedup" />
26

    
27
			</PARAMETERS>
28

    
29
			<WORKFLOW>
30

    
31
				<NODE isStart="true" name="fetchRelClasses" type="FetchRelClasses">
32
					<DESCRIPTION/>
33
					<PARAMETERS>
34
						<PARAM name="xquery" property="dnet.openaire.model.relclasses.xquery" />
35
						<PARAM name="relClassesName" value="relClasses" />
36
					</PARAMETERS>
37
					<ARCS>
38
						<ARC to="resetPath"/>
39
					</ARCS>
40
				</NODE>
41

    
42
				<NODE isStart="true" name="setDedupConfigs" type="SetDedupConfiguration">
43
					<DESCRIPTION>Set Dedup conf</DESCRIPTION>
44
					<PARAMETERS>
45
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence" />
46
						<PARAM name="dedupConfigSequenceParam" value="dedup.conf.queue" />
47
						<PARAM name="entityTypeNameParam" value="entityType" />
48
						<PARAM name="entityTypeIdParam" value="entityTypeId" />
49
					</PARAMETERS>
50
					<ARCS>
51
						<ARC to="resetPath"/>
52
					</ARCS>
53
				</NODE>
54

    
55

    
56
				<NODE name="resetPath" type="CreateHdfsDirJob" isJoin="true">
57
					<DESCRIPTION>input files cleanup</DESCRIPTION>
58
					<PARAMETERS>
59
						<PARAM name="force" value="true" />
60
						<PARAM name="path" ref="minDistWorkDir" />
61
						<PARAM name="cluster" ref="cluster" />
62
					</PARAMETERS>
63
					<ARCS>
64
						<ARC to="deduplicateScan"/>
65
					</ARCS>
66
				</NODE>
67

    
68

    
69
				<NODE name="deduplicateScan" type="DuplicateScanJob">
70
					<DESCRIPTION>Dup Scan</DESCRIPTION>
71
					<PARAMETERS>
72
						<PARAM name="hadoopJob" value="dedupCandidateScanJob" />
73
						<PARAM name="dedupConfigurationOrchestration" env="dedup.conf.queue" />
74
						<PARAM name="jobParams">
75
							<MAP>
76
								<ENTRY key="entityTypeId" env="entityTypeId"/>
77
								<ENTRY key="entityType" env="entityType"/>
78
								<ENTRY key="cluster" ref="cluster"/>
79
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
80
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
81
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
82
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
83
							</MAP>
84
						</PARAM>
85
					</PARAMETERS>
86
					<ARCS>
87
						<ARC to="deduplicateScan"/>
88
						<ARC name="done" to="prepareActionSet"/>
89
						<ARC name="done" to="queryUserSimilarities"/>
90
					</ARCS>
91
				</NODE>
92

    
93
				<NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
94
					<DESCRIPTION>prepare action sets</DESCRIPTION>
95
					<PARAMETERS>
96
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence" />
97
						<PARAM name="jobProperty" value="rawSetId" />
98
						<PARAM name="actionSetPathParam" value="actionSetPath" />
99
						<PARAM name="setsParam" value="sets" />
100
					</PARAMETERS>
101
					<ARCS>
102
						<ARC to="similarity2actions"/>
103
					</ARCS>
104
				</NODE>
105

    
106
				<NODE name="similarity2actions" type="DedupSimilarityToActions">
107
					<DESCRIPTION>export the similarity rels as Actions</DESCRIPTION>
108
					<PARAMETERS>
109
						<PARAM name="hadoopJob" value="dedupSimilarity2HdfsActionsJob" />
110
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence" />
111
						<PARAM name="entityType" env="entityType" />
112
						<PARAM name="jobParams">
113
							<MAP>
114
								<ENTRY key="dedup.conf" env="dedup.conf"/>
115
								<ENTRY key="entityTypeId" env="entityTypeId"/>
116
								<ENTRY key="entityType" env="entityType"/>
117
								<ENTRY key="cluster" ref="cluster"/>
118
								<ENTRY key="rawSetId" env="rawSetId"/>
119
								<ENTRY key="mapred.output.dir" env="actionSetPath"/>
120
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
121
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
122
							</MAP>
123
						</PARAM>
124
					</PARAMETERS>
125
					<ARCS>
126
						<ARC to="updateActionSets"/>
127
					</ARCS>
128
				</NODE>
129

    
130
				<NODE name="updateActionSets" type="UpdateActionSets">
131
					<DESCRIPTION>update action sets</DESCRIPTION>
132
					<PARAMETERS/>
133
					<ARCS>
134
						<ARC to="doneActions"/>
135
					</ARCS>
136
				</NODE>
137

    
138
				<NODE name="queryUserSimilarities" type="QueryUserActionDbJob">
139
					<DESCRIPTION>query user similarity</DESCRIPTION>
140
					<PARAMETERS>
141
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence" />
142
						<PARAM name="db" property="dnet.dedup.db.name" />
143
						<PARAM name="sql" value="/eu/dnetlib/msro/workflows/dedup/querySimilaritiesBySet.sql.st" />
144
						<PARAM name="outputEprParam" value="simEpr" />
145
					</PARAMETERS>
146
					<ARCS>
147
						<ARC to="buildSimilarityMesh"/>
148
					</ARCS>
149
				</NODE>
150

    
151
				<NODE name="buildSimilarityMesh" type="BuildSimilarityMeshJob">
152
					<DESCRIPTION>build mesh</DESCRIPTION>
153
					<PARAMETERS>
154
						<PARAM name="inputEprParam" value="simEpr" />
155
						<PARAM name="outputEprParam" value="simMeshEpr" />
156
					</PARAMETERS>
157
					<ARCS>
158
						<ARC to="storeSimilarities"/>
159
					</ARCS>
160
				</NODE>
161

    
162
				<NODE name="storeSimilarities" type="StoreHBase">
163
					<DESCRIPTION>store similarity</DESCRIPTION>
164
					<PARAMETERS>
165
						<PARAM name="inputEprParam" value="simMeshEpr" />
166
						<PARAM name="hbaseTable" ref="hbaseTable" />
167
						<PARAM name="cluster" ref="cluster" />
168
						<PARAM name="mapping" ref="mappingSimilarities" />
169
						<PARAM name="simulation" value="false" />
170
					</PARAMETERS>
171
					<ARCS>
172
						<ARC to="queryUserDissimilarities"/>
173
					</ARCS>
174
				</NODE>
175

    
176
				<NODE name="queryUserDissimilarities" type="QueryUserActionDbJob">
177
					<DESCRIPTION>query user dissimilarity</DESCRIPTION>
178
					<PARAMETERS>
179
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence" />
180
						<PARAM name="db" property="dnet.dedup.db.name" />
181
						<PARAM name="sql" value="/eu/dnetlib/msro/workflows/dedup/queryDissimilaritiesBySet.sql.st" />
182
						<PARAM name="outputEprParam" value="dissimEpr" />
183
					</PARAMETERS>
184
					<ARCS>
185
						<ARC to="storeDissimilarities"/>
186
					</ARCS>
187
				</NODE>
188

    
189
				<NODE name="storeDissimilarities" type="DeleteFromHBase">
190
					<DESCRIPTION>store dissimilarity</DESCRIPTION>
191
					<PARAMETERS>
192
						<PARAM name="inputEprParam" value="dissimEpr" />
193
						<PARAM name="hbaseTable" ref="hbaseTable" />
194
						<PARAM name="cluster" ref="cluster" />
195
						<PARAM name="mapping" ref="mappingDissimilarities" />
196
						<PARAM name="simulation" value="false" />
197
					</PARAMETERS>
198
					<ARCS>
199
						<ARC to="doneActions"/>
200
					</ARCS>
201
				</NODE>
202

    
203
				<NODE name="doneActions" isJoin="true">
204
					<DESCRIPTION>done actions</DESCRIPTION>
205
					<PARAMETERS/>
206
					<ARCS>
207
						<ARC to="closeMesh"/>
208
					</ARCS>
209
				</NODE>
210

    
211
				<NODE name="closeMesh" type="LaunchWorkflowTemplate">
212
					<DESCRIPTION>close mesh</DESCRIPTION>
213
					<PARAMETERS>
214
						<PARAM name="wfTemplateId" value="9522876c-5885-4dd2-9d06-92e9a3eaa9f1_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
215
						<PARAM name="wfTemplateParams">
216
							<MAP>
217
								<ENTRY key="hbaseTable" ref="hbaseTable"/>
218
								<ENTRY key="cluster" ref="cluster"/>
219
								<ENTRY key="workDir" ref="minDistWorkDir"/>
220
								<ENTRY key="entitySequence" env="entityType"/>
221
								<ENTRY key="dedupConfigSequence" ref="dedupConfigSequence" />
222
							</MAP>
223
						</PARAM>
224
					</PARAMETERS>
225
					<ARCS>
226
						<ARC to="success" />
227
					</ARCS>
228
				</NODE>
229

    
230
			</WORKFLOW>
231
		</CONFIGURATION>
232
	</BODY>
233
</RESOURCE_PROFILE>
(5-5/15)