Revision 48139
Added by Alessia Bardi over 6 years ago
online.dedup.organizations.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
3 |
<HEADER>
|
|
4 |
<RESOURCE_IDENTIFIER value="f6a4c2e5-a663-4700-844f-1b753484efee_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/>
|
|
7 |
<RESOURCE_URI value=""/>
|
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
|
|
9 |
</HEADER>
|
|
10 |
<BODY>
|
|
11 |
<WORKFLOW_NAME>Dedup organizations (Online)</WORKFLOW_NAME>
|
|
12 |
<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
|
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
|
14 |
<CONFIGURATION start="manual">
|
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER>
|
|
4 |
<RESOURCE_IDENTIFIER value="f6a4c2e5-a663-4700-844f-1b753484efee_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/>
|
|
7 |
<RESOURCE_URI value=""/>
|
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
|
|
9 |
</HEADER>
|
|
10 |
<BODY>
|
|
11 |
<WORKFLOW_NAME>Dedup organizations (Online)</WORKFLOW_NAME>
|
|
12 |
<WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
|
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
|
14 |
<CONFIGURATION start="manual">
|
|
15 | 15 |
<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true"> |
16 |
<DESCRIPTION />
|
|
16 |
<DESCRIPTION/> |
|
17 | 17 |
<PARAMETERS> |
18 | 18 |
<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM> |
19 | 19 |
<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM> |
20 | 20 |
</PARAMETERS> |
21 | 21 |
<ARCS> |
22 |
<ARC to="SELECT_MODE" />
|
|
22 |
<ARC to="SELECT_MODE"/> |
|
23 | 23 |
</ARCS> |
24 |
</NODE>
|
|
24 |
</NODE> |
|
25 | 25 |
<NODE name="configure" type="LoadDedupConfiguration" isStart="true"> |
26 | 26 |
<DESCRIPTION>Load Dedup conf</DESCRIPTION> |
27 | 27 |
<PARAMETERS> |
28 | 28 |
<PARAM required="true" type="string" name="entityType" managedBy="system">organization</PARAM> |
29 | 29 |
</PARAMETERS> |
30 | 30 |
<ARCS> |
31 |
<ARC to="SELECT_MODE" />
|
|
31 |
<ARC to="SELECT_MODE"/> |
|
32 | 32 |
</ARCS> |
33 | 33 |
</NODE> |
34 |
<NODE name="SELECT_MODE" type="Selection" isJoin="true">
|
|
34 |
<NODE name="SELECT_MODE" type="Selection" isJoin="true">
|
|
35 | 35 |
<DESCRIPTION>Do we start from GROUND TRUTH?</DESCRIPTION> |
36 | 36 |
<PARAMETERS> |
37 | 37 |
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">NO</PARAM> |
38 | 38 |
</PARAMETERS> |
39 | 39 |
<ARCS> |
40 |
<ARC name="YES" to="findRoots" />
|
|
41 |
<ARC name="NO" to="deduplicateScan" />
|
|
40 |
<ARC name="YES" to="findRoots"/> |
|
41 |
<ARC name="NO" to="deduplicateScan"/> |
|
42 | 42 |
</ARCS> |
43 |
</NODE>
|
|
43 |
</NODE> |
|
44 | 44 |
<NODE name="deduplicateScan" type="SubmitHadoopJob"> |
45 | 45 |
<DESCRIPTION>Dup Scan</DESCRIPTION> |
46 | 46 |
<PARAMETERS> |
47 | 47 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupCandidateScanJob</PARAM> |
48 | 48 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
49 | 49 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
50 |
{
|
|
51 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
52 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
53 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
54 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
|
|
50 |
{ |
|
51 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
52 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
53 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
54 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable' |
|
55 | 55 |
} |
56 | 56 |
</PARAM> |
57 | 57 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
58 |
{
|
|
59 |
'dedup.pace.conf' : 'dedup.pace.conf',
|
|
60 |
'dedup.wf.conf' : 'dedup.wf.conf'
|
|
58 |
{ |
|
59 |
'dedup.pace.conf' : 'dedup.pace.conf', |
|
60 |
'dedup.wf.conf' : 'dedup.wf.conf' |
|
61 | 61 |
} |
62 |
</PARAM>
|
|
62 |
</PARAM> |
|
63 | 63 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
64 |
{
|
|
65 |
'entityTypeId' : '20',
|
|
66 |
'entityType' : 'organization'
|
|
64 |
{ |
|
65 |
'entityTypeId' : '20', |
|
66 |
'entityType' : 'organization'
|
|
67 | 67 |
} |
68 | 68 |
</PARAM> |
69 | 69 |
</PARAMETERS> |
70 | 70 |
<ARCS> |
71 |
<ARC to="dedupGrouper" />
|
|
71 |
<ARC to="dedupGrouper"/> |
|
72 | 72 |
</ARCS> |
73 | 73 |
</NODE> |
74 | 74 |
<NODE name="dedupGrouper" type="DedupGrouperJob"> |
... | ... | |
77 | 77 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupGrouperJob</PARAM> |
78 | 78 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
79 | 79 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
80 |
{
|
|
81 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
82 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
83 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
84 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
|
|
80 |
{ |
|
81 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
82 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
83 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
84 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable' |
|
85 | 85 |
} |
86 | 86 |
</PARAM> |
87 | 87 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
88 |
{
|
|
89 |
'dedup.pace.conf' : 'dedup.pace.conf',
|
|
90 |
'dedup.wf.conf' : 'dedup.wf.conf'
|
|
88 |
{ |
|
89 |
'dedup.pace.conf' : 'dedup.pace.conf', |
|
90 |
'dedup.wf.conf' : 'dedup.wf.conf' |
|
91 | 91 |
} |
92 |
</PARAM>
|
|
92 |
</PARAM> |
|
93 | 93 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
94 |
{
|
|
95 |
'entityTypeId' : '20'
|
|
94 |
{ |
|
95 |
'entityTypeId' : '20' |
|
96 | 96 |
} |
97 |
</PARAM>
|
|
97 |
</PARAM> |
|
98 | 98 |
</PARAMETERS> |
99 | 99 |
<ARCS> |
100 |
<ARC to="dedupGrouper" />
|
|
101 |
<ARC name="done" to="findRoots" />
|
|
100 |
<ARC to="dedupGrouper"/> |
|
101 |
<ARC name="done" to="findRoots"/> |
|
102 | 102 |
</ARCS> |
103 | 103 |
</NODE> |
104 | 104 |
<NODE name="findRoots" type="SubmitHadoopJob"> |
... | ... | |
107 | 107 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupFindRootsJob</PARAM> |
108 | 108 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
109 | 109 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
110 |
{
|
|
111 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
112 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
113 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
114 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
|
|
110 |
{ |
|
111 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
112 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
113 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
114 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable' |
|
115 | 115 |
} |
116 | 116 |
</PARAM> |
117 | 117 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
118 |
{
|
|
119 |
'dedup.pace.conf' : 'dedup.pace.conf',
|
|
120 |
'dedup.wf.conf' : 'dedup.wf.conf'
|
|
118 |
{ |
|
119 |
'dedup.pace.conf' : 'dedup.pace.conf', |
|
120 |
'dedup.wf.conf' : 'dedup.wf.conf' |
|
121 | 121 |
} |
122 |
</PARAM>
|
|
122 |
</PARAM> |
|
123 | 123 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
124 |
{
|
|
125 |
'entityTypeId' : '20',
|
|
126 |
'entityType' : 'organization'
|
|
124 |
{ |
|
125 |
'entityTypeId' : '20', |
|
126 |
'entityType' : 'organization'
|
|
127 | 127 |
} |
128 |
</PARAM>
|
|
128 |
</PARAM> |
|
129 | 129 |
</PARAMETERS> |
130 | 130 |
<ARCS> |
131 |
<ARC to="buildRoots" />
|
|
131 |
<ARC to="buildRoots"/> |
|
132 | 132 |
</ARCS> |
133 | 133 |
</NODE> |
134 | 134 |
<NODE name="buildRoots" type="SubmitHadoopJob"> |
... | ... | |
137 | 137 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">dedupBuildRootsJob</PARAM> |
138 | 138 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
139 | 139 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
140 |
{
|
|
141 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
142 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
143 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
144 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable'
|
|
140 |
{ |
|
141 |
'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
|
|
142 |
'hbase.mapred.outputtable' : 'hbase.mapred.datatable',
|
|
143 |
'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable',
|
|
144 |
'hbase.mapreduce.outputtable' : 'hbase.mapred.datatable' |
|
145 | 145 |
} |
146 | 146 |
</PARAM> |
147 | 147 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
148 |
{
|
|
149 |
'dedup.pace.conf' : 'dedup.pace.conf',
|
|
150 |
'dedup.wf.conf' : 'dedup.wf.conf',
|
|
151 |
'relClasses' : 'relClasses'
|
|
148 |
{ |
|
149 |
'dedup.pace.conf' : 'dedup.pace.conf', |
|
150 |
'dedup.wf.conf' : 'dedup.wf.conf', |
|
151 |
'relClasses' : 'relClasses' |
|
152 | 152 |
} |
153 |
</PARAM>
|
|
153 |
</PARAM> |
|
154 | 154 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
155 |
{
|
|
156 |
'entityTypeId' : '20'
|
|
155 |
{ |
|
156 |
'entityTypeId' : '20' |
|
157 | 157 |
} |
158 |
</PARAM>
|
|
158 |
</PARAM> |
|
159 | 159 |
</PARAMETERS> |
160 | 160 |
<ARCS> |
161 |
<ARC to="success" />
|
|
161 |
<ARC to="success"/> |
|
162 | 162 |
</ARCS> |
163 | 163 |
</NODE> |
164 |
</CONFIGURATION>
|
|
165 |
<STATUS />
|
|
166 |
</BODY>
|
|
164 |
</CONFIGURATION>
|
|
165 |
<STATUS/>
|
|
166 |
</BODY>
|
|
167 | 167 |
</RESOURCE_PROFILE> |
Also available in: Unified diff
integrated (hopefully) all required changes from dnet40