Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="f6a4c2e5-a663-4700-844f-1b753484efee_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Dedup organizations (Online)</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16
				<DESCRIPTION/>
17
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="SELECT_MODE"/>
23
				</ARCS>
24
			</NODE>
25
			<NODE name="configure" type="LoadDedupConfiguration" isStart="true">
26
				<DESCRIPTION>Load Dedup conf</DESCRIPTION>
27
				<PARAMETERS>
28
					<PARAM required="true" type="string" name="entityType" managedBy="system">organization</PARAM>
29
				</PARAMETERS>
30
				<ARCS>
31
					<ARC to="SELECT_MODE"/>
32
				</ARCS>
33
			</NODE>
34
			<NODE name="SELECT_MODE" type="Selection" isJoin="true">
35
				<DESCRIPTION>Do we start from GROUND TRUTH?</DESCRIPTION>
36
				<PARAMETERS>
37
					<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">NO</PARAM>
38
				</PARAMETERS>
39
				<ARCS>
40
					<ARC name="YES" to="findRoots"/>
41
					<ARC name="NO" to="deduplicateScan"/>
42
				</ARCS>
43
			</NODE>
44
			<NODE name="deduplicateScan" type="SubmitHadoopJob">
45
				<DESCRIPTION>Dup Scan</DESCRIPTION>
46
				<PARAMETERS>
47
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupCandidateScanJob</PARAM>
48
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
49
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
50
						{
51
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
52
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
53
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
54
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
55
						}
56
					</PARAM>
57
					<PARAM required="true" type="string" name="envParams" managedBy="system">
58
						{
59
						'dedup.pace.conf' : 'dedup.pace.conf',
60
						'dedup.wf.conf' : 'dedup.wf.conf'
61
						}
62
					</PARAM>
63
					<PARAM required="true" type="string" name="params" managedBy="system">
64
						{
65
						'entityTypeId' : '20',
66
						'entityType' : 'organization'
67
						}
68
					</PARAM>
69
				</PARAMETERS>
70
				<ARCS>
71
					<ARC to="dedupGrouper"/>
72
				</ARCS>
73
			</NODE>
74
			<NODE name="dedupGrouper" type="DedupGrouperJob">
75
				<DESCRIPTION>dedup grouper</DESCRIPTION>
76
				<PARAMETERS>
77
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupGrouperJob</PARAM>
78
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
79
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
80
						{
81
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
82
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
83
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
84
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
85
						}
86
					</PARAM>
87
					<PARAM required="true" type="string" name="envParams" managedBy="system">
88
						{
89
						'dedup.pace.conf' : 'dedup.pace.conf',
90
						'dedup.wf.conf' : 'dedup.wf.conf'
91
						}
92
					</PARAM>
93
					<PARAM required="true" type="string" name="params" managedBy="system">
94
						{
95
						'entityTypeId' : '20'
96
						}
97
					</PARAM>
98
				</PARAMETERS>
99
				<ARCS>
100
					<ARC to="dedupGrouper"/>
101
					<ARC name="done" to="findRoots"/>
102
				</ARCS>
103
			</NODE>
104
			<NODE name="findRoots" type="SubmitHadoopJob">
105
				<DESCRIPTION>find roots</DESCRIPTION>
106
				<PARAMETERS>
107
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupFindRootsJob</PARAM>
108
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
109
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
110
						{
111
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
112
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
113
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
114
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
115
						}
116
					</PARAM>
117
					<PARAM required="true" type="string" name="envParams" managedBy="system">
118
						{
119
						'dedup.pace.conf' : 'dedup.pace.conf',
120
						'dedup.wf.conf' : 'dedup.wf.conf'
121
						}
122
					</PARAM>
123
					<PARAM required="true" type="string" name="params" managedBy="system">
124
						{
125
						'entityTypeId' : '20',
126
						'entityType' : 'organization'
127
						}
128
					</PARAM>
129
				</PARAMETERS>
130
				<ARCS>
131
					<ARC to="buildRoots"/>
132
				</ARCS>
133
			</NODE>
134
			<NODE name="buildRoots" type="SubmitHadoopJob">
135
				<DESCRIPTION>redirect rels</DESCRIPTION>
136
				<PARAMETERS>
137
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupBuildRootsJob</PARAM>
138
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
139
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
140
						{
141
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
142
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
143
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
144
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
145
						}
146
					</PARAM>
147
					<PARAM required="true" type="string" name="envParams" managedBy="system">
148
						{
149
						'dedup.pace.conf' : 'dedup.pace.conf',
150
						'dedup.wf.conf' : 'dedup.wf.conf',
151
						'relClasses' : 'relClasses'
152
						}
153
					</PARAM>
154
					<PARAM required="true" type="string" name="params" managedBy="system">
155
						{
156
						'entityTypeId' : '20'
157
						}
158
					</PARAM>
159
				</PARAMETERS>
160
				<ARCS>
161
					<ARC to="success"/>
162
				</ARCS>
163
			</NODE>
164
		</CONFIGURATION>
165
		<STATUS/>
166
	</BODY>
167
</RESOURCE_PROFILE>
(1-1/6)