Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="01ed11e8-e874-4478-a8ac-83e63e9699e4_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-06-25T10:24:32+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11

    
12
			<PARAMETERS>
13

    
14
				<PARAM name="hbaseTable" description="HBase table name" required="true" type="string"/>
15
				<PARAM name="cluster" description="Hadoop cluster name" required="true" type="string"/>
16

    
17
				<PARAM name="dedupConfigSequence" description="dedup configuration orchestration name" required="true" type="string"/>
18
				<PARAM name="minDistWorkDir" description="work directory for the minDist algorithm" required="true" type="string"/>
19

    
20
				<PARAM name="mdFormat" description="dedup index mdFormat" required="true" type="string" default="OPENAIRE"/>
21
				<PARAM name="layout" description="dedup index layout" required="true" type="string" default="index"/>
22
				<PARAM name="interpretation" description="dedup index interpretation" required="true" type="string" default="dedup"/>
23

    
24
			</PARAMETERS>
25

    
26
			<WORKFLOW>
27

    
28
				<NODE isStart="true" name="setDedupConfigs" type="SetDedupConfiguration">
29
					<DESCRIPTION>Set Dedup conf</DESCRIPTION>
30
					<PARAMETERS>
31
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
32
						<PARAM name="dedupConfigSequenceParam" value="dedup.conf.queue"/>
33
						<PARAM name="entityTypeNameParam" value="entityType"/>
34
						<PARAM name="entityTypeIdParam" value="entityTypeId"/>
35
					</PARAMETERS>
36
					<ARCS>
37
						<ARC to="resetPath"/>
38
					</ARCS>
39
				</NODE>
40

    
41

    
42
				<NODE name="resetPath" type="CreateHdfsDirJob">
43
					<DESCRIPTION>input files cleanup</DESCRIPTION>
44
					<PARAMETERS>
45
						<PARAM name="force" value="true"/>
46
						<PARAM name="path" ref="minDistWorkDir"/>
47
						<PARAM name="cluster" ref="cluster"/>
48
					</PARAMETERS>
49
					<ARCS>
50
						<ARC to="deduplicateScan"/>
51
					</ARCS>
52
				</NODE>
53

    
54

    
55
				<NODE name="deduplicateScan" type="DuplicateScanJob">
56
					<DESCRIPTION>Dup Scan</DESCRIPTION>
57
					<PARAMETERS>
58
						<PARAM name="hadoopJob" value="dedupCandidateScanJob"/>
59
						<PARAM name="dedupConfigurationOrchestration" env="dedup.conf.queue"/>
60
						<PARAM name="jobParams">
61
							<MAP>
62
								<ENTRY key="entityTypeId" env="entityTypeId"/>
63
								<ENTRY key="entityType" env="entityType"/>
64
								<ENTRY key="cluster" ref="cluster"/>
65
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
66
								<ENTRY key="hbase.mapred.outputtable" ref="hbaseTable"/>
67
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
68
								<ENTRY key="hbase.mapreduce.outputtable" ref="hbaseTable"/>
69
							</MAP>
70
						</PARAM>
71
					</PARAMETERS>
72
					<ARCS>
73
						<ARC to="deduplicateScan"/>
74
						<ARC name="done" to="prepareActionSet"/>
75
                        <ARC name="no_dedup" to="doneActions"/>
76
					</ARCS>
77
				</NODE>
78

    
79
				<NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
80
					<DESCRIPTION>prepare action sets</DESCRIPTION>
81
					<PARAMETERS>
82
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
83
						<PARAM name="jobProperty" value="rawSetId"/>
84
						<PARAM name="actionSetPathParam" value="actionSetPath"/>
85
						<PARAM name="setsParam" value="sets"/>
86
					</PARAMETERS>
87
					<ARCS>
88
						<ARC to="similarity2actions"/>
89
					</ARCS>
90
				</NODE>
91

    
92
				<NODE name="similarity2actions" type="DedupSimilarityToActions">
93
					<DESCRIPTION>export the similarity rels as Actions</DESCRIPTION>
94
					<PARAMETERS>
95
						<PARAM name="hadoopJob" value="dedupSimilarity2HdfsActionsJob"/>
96
						<PARAM name="dedupConfigSequence" ref="dedupConfigSequence"/>
97
						<PARAM name="entityType" env="entityType"/>
98
						<PARAM name="jobParams">
99
							<MAP>
100
								<ENTRY key="dedup.conf" env="dedup.conf"/>
101
								<ENTRY key="entityTypeId" env="entityTypeId"/>
102
								<ENTRY key="entityType" env="entityType"/>
103
								<ENTRY key="cluster" ref="cluster"/>
104
								<ENTRY key="rawSetId" env="rawSetId"/>
105
								<ENTRY key="mapred.output.dir" env="actionSetPath"/>
106
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
107
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
108
							</MAP>
109
						</PARAM>
110
					</PARAMETERS>
111
					<ARCS>
112
						<ARC to="updateActionSets"/>
113
					</ARCS>
114
				</NODE>
115

    
116
				<NODE name="updateActionSets" type="UpdateActionSets">
117
					<DESCRIPTION>update action sets</DESCRIPTION>
118
					<PARAMETERS/>
119
					<ARCS>
120
						<ARC to="doneActions"/>
121
					</ARCS>
122
				</NODE>
123

    
124
                <NODE name="doneActions">
125
					<DESCRIPTION>done actions</DESCRIPTION>
126
					<PARAMETERS/>
127
					<ARCS>
128
						<ARC to="success"/>
129
					</ARCS>
130
				</NODE>
131

    
132
			</WORKFLOW>
133
		</CONFIGURATION>
134
	</BODY>
135
</RESOURCE_PROFILE>
(2-2/5)