Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="4cf9ec5f-4f0d-4806-a93e-4905c5a1da74_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Dedup results (Online)</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16
				<DESCRIPTION/>
17
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="dedupPhase"/>
23
				</ARCS>
24
			</NODE>
25
			<NODE name="configure" type="LoadDedupConfiguration" isStart="true">
26
				<DESCRIPTION>Load Dedup conf</DESCRIPTION>
27
				<PARAMETERS>
28
					<PARAM required="true" type="string" name="entityType" managedBy="system">result</PARAM>
29
				</PARAMETERS>
30
				<ARCS>
31
					<ARC to="dedupPhase"/>
32
				</ARCS>
33
			</NODE>
34
			<NODE name="dedupPhase" type="SubmitHadoopJob" isJoin="true">
35
				<DESCRIPTION>Dedup M/R job</DESCRIPTION>
36
				<PARAMETERS>
37
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupCandidateScanJob</PARAM>
38
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
39
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
40
						{
41
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
42
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
43
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
44
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
45
						}
46
					</PARAM>
47
					<PARAM required="true" type="string" name="envParams" managedBy="system">
48
						{
49
						'dedup.pace.conf' : 'dedup.pace.conf',
50
						'dedup.wf.conf' : 'dedup.wf.conf'
51
						}
52
					</PARAM>
53
					<PARAM required="true" type="string" name="params" managedBy="system">
54
						{
55
						'entityTypeId' : '50',
56
						'entityType' : 'result'
57
						}
58
					</PARAM>
59
				</PARAMETERS>
60
				<ARCS>
61
					<ARC to="dedupGrouper"/>
62
				</ARCS>
63
			</NODE>
64
			<NODE name="dedupGrouper" type="DedupGrouperJob">
65
				<DESCRIPTION>dedup grouper</DESCRIPTION>
66
				<PARAMETERS>
67
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupGrouperJob</PARAM>
68
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
69
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
70
						{
71
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
72
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
73
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
74
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
75
						}
76
					</PARAM>
77
					<PARAM required="true" type="string" name="envParams" managedBy="system">
78
						{
79
						'dedup.pace.conf' : 'dedup.pace.conf',
80
						'dedup.wf.conf' : 'dedup.wf.conf'
81
						}
82
					</PARAM>
83
					<PARAM required="true" type="string" name="params" managedBy="system">
84
						{
85
						'entityTypeId' : '50'
86
						}
87
					</PARAM>
88
				</PARAMETERS>
89
				<ARCS>
90
					<ARC to="dedupGrouper"/>
91
					<ARC name="done" to="findRoots"/>
92
				</ARCS>
93
			</NODE>
94

    
95
			<NODE name="findRoots" type="SubmitHadoopJob">
96
				<DESCRIPTION>find roots</DESCRIPTION>
97
				<PARAMETERS>
98
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupFindRootsJob</PARAM>
99
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
100
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
101
						{
102
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
103
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
104
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
105
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
106
						}
107
					</PARAM>
108
					<PARAM required="true" type="string" name="envParams" managedBy="system">
109
						{
110
						'dedup.pace.conf' : 'dedup.pace.conf',
111
						'dedup.wf.conf' : 'dedup.wf.conf'
112
						}
113
					</PARAM>
114
					<PARAM required="true" type="string" name="params" managedBy="system">
115
						{
116
						'entityTypeId' : '50',
117
						'entityType' : 'result'
118
						}
119
					</PARAM>
120
				</PARAMETERS>
121
				<ARCS>
122
					<ARC to="buildRoots"/>
123
				</ARCS>
124
			</NODE>
125
			<NODE name="buildRoots" type="SubmitHadoopJob">
126
				<DESCRIPTION>redirect rels</DESCRIPTION>
127
				<PARAMETERS>
128
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupBuildRootsJob</PARAM>
129
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
130
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
131
						{
132
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
133
						'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
134
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
135
						'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
136
						}
137
					</PARAM>
138
					<PARAM required="true" type="string" name="envParams" managedBy="system">
139
						{
140
						'dedup.pace.conf' : 'dedup.pace.conf',
141
						'dedup.wf.conf' : 'dedup.wf.conf',
142
						'relClasses' : 'relClasses'
143
						}
144
					</PARAM>
145
					<PARAM required="true" type="string" name="params" managedBy="system">
146
						{
147
						'entityTypeId' : '50'
148
						}
149
					</PARAM>
150
				</PARAMETERS>
151
				<ARCS>
152
					<ARC to="success"/>
153
				</ARCS>
154
			</NODE>
155
		</CONFIGURATION>
156
		<STATUS/>
157
	</BODY>
158
</RESOURCE_PROFILE>
159

    
(3-3/6)