Project

General

Profile

« Previous | Next » 

Revision 38725

added dedup person workflows

View differences:

modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/offline.dedup.person.xml
20 20
 			       	</WORKFLOW>
21 21
            	</WORKFLOW>
22 22
            	<WORKFLOW id="31483043-7dd0-435f-b76e-bad9107aecc4_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Dedup person full"/>
23
            	<WORKFLOW id="ae261797-c373-48ce-8581-f1b66f702323_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Build Anchor Map"/>
24
            	<WORKFLOW id="1c782e6e-33d5-4986-b2f0-5d017415d348_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Export Fullnames"/>        	
23 25
            </WORKFLOW>
24 26
		</CONFIGURATION>
25 27
		<SCHEDULING enabled="false">
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/copyHbaseTable3.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="3d12d23f-9f4e-48a4-99c9-d896358a3f7b_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Copy HBase Table</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
			<NODE name="setInfo" type="PrepareCopyTable" isStart="true">
16
				<DESCRIPTION>Set copy parameters</DESCRIPTION>
17
				<PARAMETERS>
18
					<PARAM required="true" type="string" name="sourceCluster" managedBy="user" function="obtainValues('hadoopClusters', {})">DM</PARAM>
19
					<PARAM required="true" type="string" name="sourceTable" managedBy="user"></PARAM>
20
					<PARAM required="true" type="string" name="targetCluster" managedBy="user" function="obtainValues('hadoopClusters', {})">IIS</PARAM>
21
					<PARAM required="true" type="string" name="targetTable" managedBy="user"></PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="checkSource" />
25
				</ARCS>
26
			</NODE>
27
			<NODE name="checkSource" type="CheckHBaseTable">
28
				<DESCRIPTION>check hbase source table</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
31
					<PARAM required="true" type="string" name="envParams" managedBy="system">
32
						{ 
33
							'hbaseTable' : 'sourceTable',
34
							'cluster' : 'sourceCluster'
35
						}
36
					</PARAM>
37
					<PARAM required="true" type="string" name="existOutNode" managedBy="system">yes</PARAM>
38
					<PARAM required="true" type="string" name="dontExistOutNode" managedBy="system">no</PARAM>					
39
				</PARAMETERS>
40
				<ARCS>
41
					<ARC to="sourceDesc" name="yes" />
42
					<ARC to="failure" name ="no" />
43
				</ARCS>
44
			</NODE>
45
			<NODE name="sourceDesc" type="GetHBaseTableDescription">
46
				<DESCRIPTION>get source table description</DESCRIPTION>
47
				<PARAMETERS>
48
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
49
					<PARAM required="true" type="string" name="envParams" managedBy="system">
50
						{ 
51
							'hbaseTable' : 'sourceTable',
52
							'cluster' : 'sourceCluster'
53
						}
54
					</PARAM>
55
				</PARAMETERS>
56
				<ARCS>
57
					<ARC to="checkTarget" />
58
				</ARCS>
59
			</NODE>						
60
			<NODE name="checkTarget" type="CheckHBaseTable">
61
				<DESCRIPTION>check hbase target table</DESCRIPTION>
62
				<PARAMETERS>
63
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
64
					<PARAM required="true" type="string" name="envParams" managedBy="system">
65
						{ 
66
							'hbaseTable' : 'targetTable',
67
							'cluster' : 'targetCluster'
68
						}
69
					</PARAM>
70
					<PARAM required="true" type="string" name="existOutNode" managedBy="system">yes</PARAM>
71
					<PARAM required="true" type="string" name="dontExistOutNode" managedBy="system">no</PARAM>
72
				</PARAMETERS>
73
				<ARCS>
74
					<ARC to="setMode" name="yes" />
75
					<ARC to="create" name="no" />
76
				</ARCS>
77
			</NODE>
78
			<NODE name="setMode" type="Selection">
79
				<DESCRIPTION>select copy mode</DESCRIPTION>
80
				<PARAMETERS>
81
					<PARAM required="true" type="string" name="selection" managedBy="user" function="validValues(['REFRESH','INCREMENTAL'])">REFRESH</PARAM>					
82
				</PARAMETERS>
83
				<ARCS>
84
					<ARC to="drop" name="REFRESH" />
85
					<ARC to="copyTable" name="INCREMENTAL" />
86
				</ARCS>
87
			</NODE>			
88
			<NODE name="drop" type="DropHBaseTable">
89
				<DESCRIPTION>drop hbase target table</DESCRIPTION>
90
				<PARAMETERS>
91
					<PARAM required="true" type="string" name="envParams" managedBy="system">
92
						{ 
93
							'hbaseTable' : 'targetTable',
94
							'cluster' : 'targetCluster'
95
						}
96
					</PARAM>					
97
				</PARAMETERS>
98
				<ARCS>
99
					<ARC to="create" />
100
				</ARCS>
101
			</NODE>
102
			<NODE name="create" type="CreateHBaseTable">
103
				<DESCRIPTION>create hbase table</DESCRIPTION>
104
				<PARAMETERS>
105
					<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM>
106
					<PARAM required="true" type="string" name="envParams" managedBy="system">
107
						{ 
108
							'hbaseTable' : 'targetTable',
109
							'cluster' : 'targetCluster'
110
						}
111
					</PARAM>			
112
				</PARAMETERS>
113
				<ARCS>
114
					<ARC to="copyTable" />
115
				</ARCS>
116
			</NODE>
117
			<NODE name="copyTable" type="SubmitHadoopJob">
118
				<DESCRIPTION>Copy table Job</DESCRIPTION>
119
				<PARAMETERS>
120
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">copytable</PARAM>
121
					<PARAM required="true" type="string" name="envParams" managedBy="system">
122
						{
123
							'cluster' : 'sourceCluster',
124
							'targetCluster' : 'targetCluster', 
125
							'new.name' : 'targetTable', 
126
							'hbase.mapreduce.inputtable' : 'sourceTable',
127
							'peer.adr' : 'peer.adr'
128
						}
129
					</PARAM>
130
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>	
131
				</PARAMETERS>
132
				<ARCS>
133
					<ARC to="success" />
134
				</ARCS>
135
			</NODE>			
136
        </CONFIGURATION>
137
        <STATUS />
138
    </BODY>
139
</RESOURCE_PROFILE>
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/build.anchor.map.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="ae261797-c373-48ce-8581-f1b66f702323_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Offline Deduplication</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15

  
16
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
17
				<DESCRIPTION>Set table name</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
20
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="cleanupMap" />
24
				</ARCS>
25
			</NODE>			
26
			<NODE name="setPath" type="SetHdfsPathJob" isStart="true">
27
				<DESCRIPTION>Set map path name</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="path" managedBy="user">/tmp/anchorMap</PARAM>
30
					<PARAM required="true" type="string" name="pathParam" managedBy="system">mapPath</PARAM>
31
				</PARAMETERS>
32
				<ARCS>
33
					<ARC to="cleanupMap" />
34
				</ARCS>
35
			</NODE>	
36
			
37
			<!-- postprocess -->
38
			
39
			<NODE name="cleanupMap" type="DeleteHdfsPathJob" isJoin="true">
40
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
41
				<PARAMETERS>
42
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
43
					<PARAM required="true" type="string" name="envParams" managedBy="system">
44
						{ 	
45
							'path' : 'mapPath'
46
						}
47
					</PARAM>					
48
				</PARAMETERS>
49
				<ARCS>
50
					<ARC to="buildAnchorMap" />
51
				</ARCS>
52
			</NODE>			
53
 
54
			<NODE name="buildAnchorMap" type="SubmitHadoopJob">
55
				<DESCRIPTION>redirect rels</DESCRIPTION>
56
				<PARAMETERS>
57
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">buildMergedToAnchorMapJob</PARAM>
58
					<PARAM required="true" type="string" name="envParams" managedBy="system">
59
						{ 	
60
							'cluster' : 'cluster',
61
							'hbase.mapred.inputtable' : 'tableName', 
62
							'hbase.mapreduce.inputtable' : 'tableName', 
63
							'hbase.mapred.outputtable' : 'tableName', 
64
							'hbase.mapreduce.outputtable' : 'tableName',
65
							'mapred.output.dir' : 'mapPath'												
66
						}
67
					</PARAM>					
68
				</PARAMETERS>
69
				<ARCS>
70
					<ARC to="success" />
71
				</ARCS>
72
			</NODE>
73

  
74

  
75
        </CONFIGURATION>
76
        <STATUS />
77
    </BODY>
78
</RESOURCE_PROFILE>
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/dedup.roots.export.xml
9 9
	</HEADER>
10 10
	<BODY>
11 11
		<WORKFLOW_NAME>Deduplication export</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
12
		<WORKFLOW_TYPE>Export to HDFS</WORKFLOW_TYPE>
13 13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14 14
		<CONFIGURATION start="manual">
15 15
        	<NODE name="setCsvPath" type="SetEnvParameter" isStart="true">
......
39 39
				<PARAMETERS>
40 40
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
41 41
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
42
					<PARAM required="true" type="string" name="table" managedBy="user"/>
42 43
				</PARAMETERS>
43 44
				<ARCS>
44 45
					<ARC to="cleanupCsv" />
......
59 60
					<ARC to="export" />
60 61
				</ARCS>
61 62
			</NODE>
62
				
63
			
63 64
			<NODE name="export" type="SubmitHadoopJob">
64 65
				<DESCRIPTION>export the representative publications</DESCRIPTION>
65 66
				<PARAMETERS>
66 67
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupRootsExportJob</PARAM>
67
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
68
						{ 	
69
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
70
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
71
						}
72
					</PARAM>					
73 68
					<PARAM required="true" type="string" name="envParams" managedBy="system">
74 69
						{ 	
75 70
							'cluster' : 'cluster',
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/copyBackGtTable.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="3d12d23f-9f4e-48a4-99c9-d896358a3f7b_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Copy HBase Table</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
        
16
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
17
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM function="obtainValues('dedupOrchestrations', {})" required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
20
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="checkConf" />
24
				</ARCS>
25
			</NODE>
26
        	<NODE name="checkConf" type="DedupCheckConfiguration">
27
				<DESCRIPTION/>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
30
				</PARAMETERS>
31
				<ARCS>
32
					<ARC to="setInfo" />
33
				</ARCS>
34
			</NODE>			        
35
        
36
			<NODE name="setInfo" type="PrepareCopyTable">
37
				<DESCRIPTION>Set copy parameters</DESCRIPTION>
38
				<PARAMETERS>
39
					<PARAM required="true" type="string" name="sourceCluster" managedBy="user" function="obtainValues('hadoopClusters', {})">DM</PARAM>
40
					<PARAM required="true" type="string" name="sourceTable" managedBy="user"></PARAM>
41
					<PARAM required="true" type="string" name="targetCluster" managedBy="user" function="obtainValues('hadoopClusters', {})">IIS</PARAM>
42
					<PARAM required="true" type="string" name="targetTable" managedBy="user"></PARAM>
43
				</PARAMETERS>
44
				<ARCS>
45
					<ARC to="checkSource" />
46
				</ARCS>
47
			</NODE>
48
			<NODE name="checkSource" type="CheckHBaseTable">
49
				<DESCRIPTION>check hbase source table</DESCRIPTION>
50
				<PARAMETERS>
51
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
52
					<PARAM required="true" type="string" name="envParams" managedBy="system">
53
						{ 
54
							'hbaseTable' : 'sourceTable',
55
							'cluster' : 'sourceCluster'
56
						}
57
					</PARAM>
58
					<PARAM required="true" type="string" name="existOutNode" managedBy="system">yes</PARAM>
59
					<PARAM required="true" type="string" name="dontExistOutNode" managedBy="system">no</PARAM>					
60
				</PARAMETERS>
61
				<ARCS>
62
					<ARC to="sourceDesc" name="yes" />
63
					<ARC to="failure" name ="no" />
64
				</ARCS>
65
			</NODE>
66
			<NODE name="sourceDesc" type="GetHBaseTableDescription">
67
				<DESCRIPTION>get source table description</DESCRIPTION>
68
				<PARAMETERS>
69
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
70
					<PARAM required="true" type="string" name="envParams" managedBy="system">
71
						{ 
72
							'hbaseTable' : 'sourceTable',
73
							'cluster' : 'sourceCluster'
74
						}
75
					</PARAM>
76
				</PARAMETERS>
77
				<ARCS>
78
					<ARC to="checkTarget" />
79
				</ARCS>
80
			</NODE>						
81
			<NODE name="checkTarget" type="CheckHBaseTable">
82
				<DESCRIPTION>check hbase target table</DESCRIPTION>
83
				<PARAMETERS>
84
					<PARAM required="true" type="string" name="tableColumnsParamName" managedBy="system">hTableColumns</PARAM>
85
					<PARAM required="true" type="string" name="envParams" managedBy="system">
86
						{ 
87
							'hbaseTable' : 'targetTable',
88
							'cluster' : 'targetCluster'
89
						}
90
					</PARAM>
91
					<PARAM required="true" type="string" name="existOutNode" managedBy="system">yes</PARAM>
92
					<PARAM required="true" type="string" name="dontExistOutNode" managedBy="system">no</PARAM>
93
				</PARAMETERS>
94
				<ARCS>
95
					<ARC to="setMode" name="yes" />
96
					<ARC to="create" name="no" />
97
				</ARCS>
98
			</NODE>
99
			<NODE name="setMode" type="Selection">
100
				<DESCRIPTION>select copy mode</DESCRIPTION>
101
				<PARAMETERS>
102
					<PARAM required="true" type="string" name="selection" managedBy="user" function="validValues(['REFRESH','INCREMENTAL'])">REFRESH</PARAM>					
103
				</PARAMETERS>
104
				<ARCS>
105
					<ARC to="drop" name="REFRESH" />
106
					<ARC to="copyTable" name="INCREMENTAL" />
107
				</ARCS>
108
			</NODE>			
109
			<NODE name="drop" type="DropHBaseTable">
110
				<DESCRIPTION>drop hbase target table</DESCRIPTION>
111
				<PARAMETERS>
112
					<PARAM required="true" type="string" name="envParams" managedBy="system">
113
						{ 
114
							'hbaseTable' : 'targetTable',
115
							'cluster' : 'targetCluster'
116
						}
117
					</PARAM>					
118
				</PARAMETERS>
119
				<ARCS>
120
					<ARC to="create" />
121
				</ARCS>
122
			</NODE>
123
			<NODE name="create" type="CreateHBaseTable">
124
				<DESCRIPTION>create hbase table</DESCRIPTION>
125
				<PARAMETERS>
126
					<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM>
127
					<PARAM required="true" type="string" name="envParams" managedBy="system">
128
						{ 
129
							'hbaseTable' : 'targetTable',
130
							'cluster' : 'targetCluster'
131
						}
132
					</PARAM>			
133
				</PARAMETERS>
134
				<ARCS>
135
					<ARC to="cleanGT" />
136
				</ARCS>
137
			</NODE>
138
			
139
			
140
			
141
			<NODE name="cleanGT" type="DedupConfigurationAwareJob">
142
				<DESCRIPTION>Clean GT</DESCRIPTION>
143
				<PARAMETERS>
144
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">gtCleanerJob</PARAM>
145
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
146
					<PARAM required="true" type="string" name="envParams" managedBy="system">
147
						{
148
							'cluster' : 'sourceCluster',
149
							'dedup.conf' : 'dedup.conf',
150
							'entityTypeId' : 'entityTypeId',
151
							'entityType' : 'entityType',
152
							'hbase.mapred.inputtable' : 'sourceTable', 
153
							'hbase.mapreduce.inputtable' : 'sourceTable', 
154
							'hbase.mapred.outputtable' : 'sourceTable', 
155
							'hbase.mapreduce.outputtable' : 'sourceTable'	
156

  
157
						}
158
					</PARAM>
159
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>	
160
				</PARAMETERS>
161
				<ARCS>
162
					<ARC to="copyTable" />
163
				</ARCS>
164
			</NODE>				
165
			<NODE name="copyTable" type="SubmitHadoopJob">
166
				<DESCRIPTION>Copy table Job</DESCRIPTION>
167
				<PARAMETERS>
168
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">copytable</PARAM>
169
					<PARAM required="true" type="string" name="envParams" managedBy="system">
170
						{
171
							'cluster' : 'sourceCluster',
172
							'targetCluster' : 'targetCluster', 
173
							'new.name' : 'targetTable', 
174
							'hbase.mapreduce.inputtable' : 'sourceTable',
175
							'peer.adr' : 'peer.adr'
176
						}
177
					</PARAM>
178
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>	
179
				</PARAMETERS>
180
				<ARCS>
181
					<ARC to="success" />
182
				</ARCS>
183
			</NODE>			
184
        </CONFIGURATION>
185
        <STATUS />
186
    </BODY>
187
</RESOURCE_PROFILE>
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/export.fullnames.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="1c782e6e-33d5-4986-b2f0-5d017415d348_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Export Fullnames</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
        
16
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
17
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM function="obtainValues('dedupOrchestrations', {})" required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
20
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="checkConf" />
24
				</ARCS>
25
			</NODE>        
26

  
27
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
28
				<DESCRIPTION>Set table name</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
31
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
32
					<PARAM required="true" type="string" name="table" managedBy="user"></PARAM>
33
				</PARAMETERS>
34
				<ARCS>
35
					<ARC to="checkConf" />
36
				</ARCS>
37
			</NODE>			
38
			<NODE name="setPath" type="SetHdfsPathJob" isStart="true">
39
				<DESCRIPTION>Set map path name</DESCRIPTION>
40
				<PARAMETERS>
41
					<PARAM required="true" type="string" name="path" managedBy="user">/tmp/fullnames_test</PARAM>
42
					<PARAM required="true" type="string" name="pathParam" managedBy="system">exportPath</PARAM>
43
				</PARAMETERS>
44
				<ARCS>
45
					<ARC to="checkConf" />
46
				</ARCS>
47
			</NODE>	
48
			
49
        	<NODE name="checkConf" type="DedupCheckConfiguration" isJoin="true">
50
				<DESCRIPTION/>
51
				<PARAMETERS>
52
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
53
				</PARAMETERS>
54
				<ARCS>
55
					<ARC to="cleanupNames" />
56
				</ARCS>
57
			</NODE>			
58
			
59
			<NODE name="cleanupNames" type="DeleteHdfsPathJob">
60
				<DESCRIPTION>hdfs cleanup</DESCRIPTION>
61
				<PARAMETERS>
62
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
63
					<PARAM required="true" type="string" name="envParams" managedBy="system">
64
						{ 	
65
							'path' : 'exportPath'
66
						}
67
					</PARAM>					
68
				</PARAMETERS>
69
				<ARCS>
70
					<ARC to="exportFullnames" />
71
				</ARCS>
72
			</NODE>			
73
 
74
			<NODE name="exportFullnames" type="DedupConfigurationAwareJob">
75
				<DESCRIPTION>redirect rels</DESCRIPTION>
76
				<PARAMETERS>
77
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">exportPersonFullnamesJob</PARAM>
78
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
79
					<PARAM required="true" type="string" name="envParams" managedBy="system">
80
						{ 	
81
							'cluster' : 'cluster',
82
							'entityTypeId' : 'entityTypeId',
83
							'entityType' : 'entityType',							
84
							'hbase.mapred.inputtable' : 'tableName', 
85
							'hbase.mapreduce.inputtable' : 'tableName', 
86
							'hbase.mapred.outputtable' : 'tableName', 
87
							'hbase.mapreduce.outputtable' : 'tableName',
88
							'mapred.output.dir' : 'exportPath'												
89
						}
90
					</PARAM>					
91
				</PARAMETERS>
92
				<ARCS>
93
					<ARC to="success" />
94
				</ARCS>
95
			</NODE>
96

  
97

  
98
        </CONFIGURATION>
99
        <STATUS />
100
    </BODY>
101
</RESOURCE_PROFILE>
modules/dnet-deduplication/trunk/src/main/resources/eu/dnetlib/test/profiles/meta/workflows/delete.simrels.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="0fc975d7-65c1-4d39-88ec-7bca72744e89_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Delete Similarity Rels</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15

  
16
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
17
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM function="obtainValues('dedupOrchestrations', {})" required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
20
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="checkConf" />
24
				</ARCS>
25
			</NODE>
26
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
27
				<DESCRIPTION>Set table name</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
30
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
31
				</PARAMETERS>
32
				<ARCS>
33
					<ARC to="checkConf" />
34
				</ARCS>
35
			</NODE>			
36
			
37
        	<NODE name="checkConf" type="DedupCheckConfiguration" isJoin="true">
38
				<DESCRIPTION/>
39
				<PARAMETERS>
40
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
41
				</PARAMETERS>
42
				<ARCS>
43
					<ARC to="deleteSimRels" />
44
				</ARCS>
45
			</NODE>
46
						
47
			<NODE name="deleteSimRels" type="DedupConfigurationAwareJob">
48
				<DESCRIPTION>redirect rels</DESCRIPTION>
49
				<PARAMETERS>
50
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">deleteSimRelJob</PARAM>
51
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
52
					<PARAM required="true" type="string" name="envParams" managedBy="system">
53
						{ 	
54
							'dedup.conf' : 'dedup.conf',
55
							'entityTypeId' : 'entityTypeId',
56
							'entityType' : 'entityType',
57
							'cluster' : 'cluster',
58
							'hbase.mapred.inputtable' : 'tableName', 
59
							'hbase.mapreduce.inputtable' : 'tableName', 
60
							'hbase.mapred.outputtable' : 'tableName', 
61
							'hbase.mapreduce.outputtable' : 'tableName'														
62
						}
63
					</PARAM>					
64
				</PARAMETERS>
65
				<ARCS>
66
					<ARC to="success" />
67
				</ARCS>
68
			</NODE>
69

  
70

  
71
        </CONFIGURATION>
72
        <STATUS />
73
    </BODY>
74
</RESOURCE_PROFILE>

Also available in: Unified diff