Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="fee3b0cc-4d7a-4926-a174-258277f3af1f_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>Offline Deduplication (scan only)</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16
				<DESCRIPTION />
17
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="checkConf" />
23
				</ARCS>
24
			</NODE>        
25
			<NODE name="setDedupConfigs" type="SetDedupConfiguration" isStart="true">
26
				<DESCRIPTION>Set Dedup conf</DESCRIPTION>
27
				<PARAMETERS>
28
					<PARAM function="obtainValues('dedupOrchestrations', {})" required="true" type="string" name="dedupConfigSequence" managedBy="user"></PARAM>
29
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
30
				</PARAMETERS>
31
				<ARCS>
32
					<ARC to="checkConf" />
33
				</ARCS>
34
			</NODE>
35
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
36
				<DESCRIPTION>Set table name</DESCRIPTION>
37
				<PARAMETERS>
38
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
39
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
40
				</PARAMETERS>
41
				<ARCS>
42
					<ARC to="checkConf" />
43
				</ARCS>
44
			</NODE>			
45
        	<NODE name="checkConf" type="DedupCheckConfiguration" isJoin="true">
46
				<DESCRIPTION/>
47
				<PARAMETERS>
48
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
49
				</PARAMETERS>
50
				<ARCS>
51
					<ARC to="deduplicateScan" />
52
				</ARCS>
53
			</NODE>
54
			
55
			<NODE name="deduplicateScan" type="DuplicateScanJob">
56
				<DESCRIPTION>Dup Scan</DESCRIPTION>
57
				<PARAMETERS>
58
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupCandidateScanJob</PARAM>
59
					<PARAM required="true" type="string" name="dedupConfigSequenceParam" managedBy="system">dedup.conf.queue</PARAM>
60
					<PARAM required="true" type="string" name="envParams" managedBy="system">
61
						{ 	
62
							'entityTypeId' : 'entityTypeId',
63
							'entityType' : 'entityType',
64
							'cluster' : 'cluster',
65
							'hbase.mapred.inputtable' : 'tableName', 
66
							'hbase.mapred.outputtable' : 'tableName', 
67
							'hbase.mapreduce.inputtable' : 'tableName', 
68
							'hbase.mapreduce.outputtable' : 'tableName'
69
						}
70
					</PARAM>					
71
				</PARAMETERS>
72
				<ARCS>
73
					<ARC to="deduplicateScan" />
74
					<ARC name="done" to="success" />
75
				</ARCS>
76
			</NODE>
77

    
78
        </CONFIGURATION>
79
        <STATUS />
80
    </BODY>
81
</RESOURCE_PROFILE>
(24-24/32)