Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-06-25T10:24:32+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11

    
12
			<PARAMETERS>
13

    
14
				<PARAM name="hbaseTable" description="HBase table name" required="true" type="string"/>
15
				<PARAM name="cluster" description="Hadoop cluster name" required="true" type="string"/>
16

    
17
				<PARAM name="dedupConfigSequence" description="dedup configuration orchestration name" required="true" type="string"/>
18
				<PARAM name="minDistWorkDir" description="work directory for the minDist algorithm" required="true" type="string"/>
19

    
20
				<PARAM name="mappingSimilarities" description="xslt mapping function for similarities" required="true" type="string"/>
21
				<PARAM name="mappingDissimilarities" description="xslt mapping function for dissimilarities" required="true" type="string"/>
22

    
23
				<PARAM name="mdFormat" description="dedup index mdFormat" required="true" type="string" default="OPENAIRE"/>
24
				<PARAM name="layout" description="dedup index layout" required="true" type="string" default="index"/>
25
				<PARAM name="interpretation" description="dedup index interpretation" required="true" type="string" default="dedup"/>
26

    
27
			</PARAMETERS>
28

    
29
			<WORKFLOW>
30

    
31
				<NODE isStart="true" name="setDedupConfigs" type="SetDedupConfiguration">
32
					<DESCRIPTION>Set Dedup conf</DESCRIPTION>
33
					<PARAMETERS>
34
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
35
						<PARAM name="dedupConfigSequenceParam" value="dedup.conf.queue"/>
36
						<PARAM name="entityTypeNameParam" value="entityType"/>
37
						<PARAM name="entityTypeIdParam" value="entityTypeId"/>
38
					</PARAMETERS>
39
					<ARCS>
40
						<ARC to="resetPath"/>
41
					</ARCS>
42
				</NODE>
43

    
44

    
45
				<NODE name="resetPath" type="CreateHdfsDirJob">
46
					<DESCRIPTION>input files cleanup</DESCRIPTION>
47
					<PARAMETERS>
48
						<PARAM name="force" value="true"/>
49
						<PARAM name="path" ref="minDistWorkDir"/>
50
						<PARAM name="cluster" ref="cluster"/>
51
					</PARAMETERS>
52
					<ARCS>
53
						<ARC to="deduplicateScan"/>
54
					</ARCS>
55
				</NODE>
56

    
57

    
58
				<NODE name="deduplicateScan" type="DuplicateScanJob">
59
					<DESCRIPTION>Dup Scan</DESCRIPTION>
60
					<PARAMETERS>
61
						<PARAM name="hadoopJob" value="dedupCandidateScanJob"/>
62
						<PARAM name="dedupConfigurationOrchestration" env="dedup.conf.queue"/>
63
						<PARAM name="jobParams">
64
							<MAP>
65
								<ENTRY key="entityTypeId" env="entityTypeId"/>
66
								<ENTRY key="entityType" env="entityType"/>
67
								<ENTRY key="cluster" ref="cluster"/>
68
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
69
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
70
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
71
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
72
							</MAP>
73
						</PARAM>
74
					</PARAMETERS>
75
					<ARCS>
76
						<ARC to="deduplicateScan"/>
77
						<ARC name="done" to="prepareActionSet"/>
78
					</ARCS>
79
				</NODE>
80

    
81
				<NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
82
					<DESCRIPTION>prepare action sets</DESCRIPTION>
83
					<PARAMETERS>
84
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
85
						<PARAM name="jobProperty" value="rawSetId"/>
86
						<PARAM name="actionSetPathParam" value="actionSetPath"/>
87
						<PARAM name="setsParam" value="sets"/>
88
					</PARAMETERS>
89
					<ARCS>
90
						<ARC to="similarity2actions"/>
91
					</ARCS>
92
				</NODE>
93

    
94
				<NODE name="similarity2actions" type="DedupSimilarityToActions">
95
					<DESCRIPTION>export the similarity rels as Actions</DESCRIPTION>
96
					<PARAMETERS>
97
						<PARAM name="hadoopJob" value="dedupSimilarity2HdfsActionsJob"/>
98
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
99
						<PARAM name="entityType" env="entityType"/>
100
						<PARAM name="jobParams">
101
							<MAP>
102
								<ENTRY key="dedup.conf" env="dedup.conf"/>
103
								<ENTRY key="entityTypeId" env="entityTypeId"/>
104
								<ENTRY key="entityType" env="entityType"/>
105
								<ENTRY key="cluster" ref="cluster"/>
106
								<ENTRY key="rawSetId" env="rawSetId"/>
107
								<ENTRY key="mapred.output.dir" env="actionSetPath"/>
108
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
109
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
110
							</MAP>
111
						</PARAM>
112
					</PARAMETERS>
113
					<ARCS>
114
						<ARC to="updateActionSets"/>
115
					</ARCS>
116
				</NODE>
117

    
118
				<NODE name="updateActionSets" type="UpdateActionSets">
119
					<DESCRIPTION>update action sets</DESCRIPTION>
120
					<PARAMETERS/>
121
					<ARCS>
122
						<ARC to="doneActions"/>
123
					</ARCS>
124
				</NODE>
125

    
126
				<NODE name="doneActions" isJoin="true">
127
					<DESCRIPTION>done actions</DESCRIPTION>
128
					<PARAMETERS/>
129
					<ARCS>
130
						<ARC to="success"/>
131
					</ARCS>
132
				</NODE>
133

    
134
			</WORKFLOW>
135
		</CONFIGURATION>
136
	</BODY>
137
</RESOURCE_PROFILE>
(3-3/6)