Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="70274106-375d-4135-9de1-536a606b327b_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2017-02-28T10:21:23+01:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11
			<PARAMETERS>
12
				<PARAM description="HBase table name" name="hbaseTable" required="true" type="string"/>
13
				<PARAM description="Hadoop cluster name" name="cluster" required="true" type="string"/>
14
				<PARAM description="minDist algorithm work directory" name="workDir" required="true" type="string"/>
15
				<PARAM description="csv list of action sets to be promoted" name="actionSets" required="true" type="string"/>
16
				<PARAM default="organization,result" description="list of entity names to be considered by the algorithm" name="entitySequence" required="true" type="string"/>
17
			</PARAMETERS>
18
			<WORKFLOW>
19
				<NODE isStart="true" name="fetchOntologies" type="FetchOntologies">
20
					<DESCRIPTION/>
21
					<PARAMETERS>
22
						<PARAM name="ontologiesParamName" value="ontologies"/>
23
					</PARAMETERS>
24
					<ARCS>
25
						<ARC to="promoteActions"/>
26
					</ARCS>
27
				</NODE>
28
				<NODE name="promoteActions" type="PromoteActionsHDFS">
29
					<DESCRIPTION>Promote Actions</DESCRIPTION>
30
					<PARAMETERS>
31
						<PARAM name="set" ref="actionSets"/>
32
						<PARAM name="tableName" ref="hbaseTable"/>
33
					</PARAMETERS>
34
					<ARCS>
35
						<ARC to="entitySequence"/>
36
					</ARCS>
37
				</NODE>
38
				<NODE name="entitySequence" type="CheckEntitySequenceJob">
39
					<DESCRIPTION>Check entity sequence</DESCRIPTION>
40
					<PARAMETERS>
41
						<PARAM name="entitySequence" ref="entitySequence"/>
42
						<PARAM name="dedupConfigSequenceParam" value="dedup.conf.queue"/>
43
						<PARAM name="entityTypeNameParam" value="dedup.entity.name"/>
44
						<PARAM name="entityTypeIdParam" value="dedup.entity.id"/>
45
						<PARAM name="actionSetParam" value="actionset"/>
46
					</PARAMETERS>
47
					<ARCS>
48
						<ARC to="resetPath"/>
49
						<ARC name="done" to="success"/>
50
					</ARCS>
51
				</NODE>
52
				<NODE name="resetPath" type="CreateHdfsDirJob">
53
					<DESCRIPTION>input files cleanup</DESCRIPTION>
54
					<PARAMETERS>
55
						<PARAM name="force" value="true"/>
56
						<PARAM name="path" ref="workDir"/>
57
						<PARAM name="cluster" ref="cluster"/>
58
					</PARAMETERS>
59
					<ARCS>
60
						<ARC to="mindist"/>
61
					</ARCS>
62
				</NODE>
63
				<NODE name="mindist" type="MinDistSearchHadoopJob">
64
					<DESCRIPTION>find the minimum vertex in each adjacency lists</DESCRIPTION>
65
					<PARAMETERS>
66
						<PARAM name="debug" value="false"/>
67
						<PARAM name="cluster" ref="cluster"/>
68
						<PARAM name="outPathParam" value="outputPath"/>
69
						<PARAM env="actionset" name="dedupConfigSequence"/>
70
						<PARAM name="workDir" ref="workDir"/>
71
						<PARAM name="jobParams">
72
							<MAP>
73
								<ENTRY key="cluster" ref="cluster"/>
74
								<ENTRY env="dedup.entity.id" key="entityTypeId"/>
75
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
76
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
77
							</MAP>
78
						</PARAM>
79
					</PARAMETERS>
80
					<ARCS>
81
						<ARC name="depth_n" to="mindist"/>
82
						<ARC to="components"/>
83
					</ARCS>
84
				</NODE>
85
				<NODE name="components" type="DedupConfigurationLoaderJob">
86
					<DESCRIPTION>joins all the vertex ids to build the connected components in the graph</DESCRIPTION>
87
					<PARAMETERS>
88
						<PARAM name="hadoopJob" value="connectedComponentsJob"/>
89
						<PARAM env="actionset" name="dedupConfigSequence"/>
90
						<PARAM name="cluster" ref="cluster"/>
91
						<PARAM name="jobParams">
92
							<MAP>
93
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
94
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
95
								<ENTRY env="dedup.entity.name" key="entityType"/>
96
								<ENTRY env="dedup.entity.id" key="entityTypeId"/>
97
								<ENTRY env="outputPath" key="mapred.input.dir"/>
98
							</MAP>
99
						</PARAM>
100
					</PARAMETERS>
101
					<ARCS>
102
						<ARC to="markDeleted"/>
103
					</ARCS>
104
				</NODE>
105
				<NODE name="markDeleted" type="DedupConfigurationLoaderJob">
106
					<DESCRIPTION>mark duplicates as deleted by inference</DESCRIPTION>
107
					<PARAMETERS>
108
						<PARAM name="hadoopJob" value="dedupMarkDeletedEntityJob"/>
109
						<PARAM env="actionset" name="dedupConfigSequence"/>
110
						<PARAM name="cluster" ref="cluster"/>
111
						<PARAM name="jobParams">
112
							<MAP>
113
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
114
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
115
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
116
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
117
								<ENTRY env="dedup.entity.name" key="entityType"/>
118
								<ENTRY env="dedup.entity.id" key="entityTypeId"/>
119
							</MAP>
120
						</PARAM>
121
					</PARAMETERS>
122
					<ARCS>
123
						<ARC to="buildRoots"/>
124
					</ARCS>
125
				</NODE>
126
				<NODE name="buildRoots" type="DedupConfigurationLoaderJob">
127
					<DESCRIPTION>redirect rels</DESCRIPTION>
128
					<PARAMETERS>
129
						<PARAM name="hadoopJob" value="dedupBuildRootsJob"/>
130
						<PARAM env="actionset" name="dedupConfigSequence"/>
131
						<PARAM name="cluster" ref="cluster"/>
132
						<PARAM name="jobParams">
133
							<MAP>
134
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
135
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
136
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
137
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
138
								<ENTRY env="dedup.entity.name" key="entityType"/>
139
								<ENTRY env="dedup.entity.id" key="entityTypeId"/>
140
								<ENTRY env="ontologies" key="ontologies"/>
141
							</MAP>
142
						</PARAM>
143
					</PARAMETERS>
144
					<ARCS>
145
						<ARC to="mergeRels"/>
146
					</ARCS>
147
				</NODE>
148
				<NODE name="mergeRels" type="DedupConfigurationLoaderJob">
149
					<DESCRIPTION>merge rels</DESCRIPTION>
150
					<PARAMETERS>
151
						<PARAM name="hadoopJob" value="mergeRelationVersionJob"/>
152
						<PARAM env="actionset" name="dedupConfigSequence"/>
153
						<PARAM name="cluster" ref="cluster"/>
154
						<PARAM name="jobParams">
155
							<MAP>
156
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
157
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
158
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
159
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
160
								<ENTRY env="dedup.entity.name" key="entityType"/>
161
								<ENTRY env="dedup.entity.id" key="entityTypeId"/>
162
								<ENTRY env="ontologies" key="ontologies"/>
163
							</MAP>
164
						</PARAM>
165
					</PARAMETERS>
166
					<ARCS>
167
						<ARC to="entitySequence"/>
168
					</ARCS>
169
				</NODE>
170
			</WORKFLOW>
171
		</CONFIGURATION>
172
	</BODY>
173
</RESOURCE_PROFILE>
(1-1/5)