Revision 58349
Added by Claudio Atzori about 4 years ago
dedup.ocean.xml | ||
---|---|---|
4 | 4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
5 | 5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
6 | 6 |
<RESOURCE_URI value=""/> |
7 |
<DATE_OF_CREATION value="2020-03-20T22:49:01+00:00"/>
|
|
7 |
<DATE_OF_CREATION value="2020-03-24T11:15:39+00:00"/>
|
|
8 | 8 |
</HEADER> |
9 | 9 |
<BODY> |
10 | 10 |
<WORKFLOW_NAME>Offline Deduplication [OCEAN]</WORKFLOW_NAME> |
... | ... | |
51 | 51 |
'mongoDb' : 'mdstore', |
52 | 52 |
'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', |
53 | 53 |
'postgresUser' : 'dnet', |
54 |
'postgresPassword' : 'xxxx',
|
|
54 |
'postgresPassword' : 'xxx', |
|
55 | 55 |
'workingPath' : '/tmp/dhp_migration_beta', |
56 | 56 |
'reuseContent' : 'true' |
57 | 57 |
} |
... | ... | |
60 | 60 |
<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM> |
61 | 61 |
</PARAMETERS> |
62 | 62 |
<ARCS> |
63 |
<ARC to="setActionSetId"/>
|
|
63 |
<ARC to="setRawGraphPath"/>
|
|
64 | 64 |
</ARCS> |
65 | 65 |
</NODE> |
66 | 66 |
|
67 |
<NODE name="setActionSetId" type="SetEnvParameter">
|
|
68 |
<DESCRIPTION>Set action set id</DESCRIPTION>
|
|
67 |
<NODE name="setRawGraphPath" type="SetEnvParameter">
|
|
68 |
<DESCRIPTION>Set the path containing the raw graph</DESCRIPTION>
|
|
69 | 69 |
<PARAMETERS> |
70 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">setActionSetId</PARAM>
|
|
71 |
<PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-organization</PARAM>
|
|
70 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
|
|
71 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_raw</PARAM>
|
|
72 | 72 |
</PARAMETERS> |
73 | 73 |
<ARCS> |
74 |
<ARC to="setDedupConfig"/>
|
|
74 |
<ARC to="setDedupWorkingPath"/>
|
|
75 | 75 |
</ARCS> |
76 | 76 |
</NODE> |
77 | 77 |
|
78 |
<NODE name="setDedupConfig" type="SetDedupConfiguration">
|
|
79 |
<DESCRIPTION>Set Dedup conf</DESCRIPTION>
|
|
78 |
<NODE name="setDedupWorkingPath" type="SetEnvParameter">
|
|
79 |
<DESCRIPTION>Set the path used by dedup workflows to store intermediate data</DESCRIPTION>
|
|
80 | 80 |
<PARAMETERS> |
81 |
<PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM>
|
|
82 |
<PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
|
|
81 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupWorkingPath</PARAM>
|
|
82 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_dedup</PARAM>
|
|
83 | 83 |
</PARAMETERS> |
84 | 84 |
<ARCS> |
85 |
<ARC to="prepareActionSet"/>
|
|
85 |
<ARC to="setDedupGraphPath"/>
|
|
86 | 86 |
</ARCS> |
87 | 87 |
</NODE> |
88 | 88 |
|
89 |
<NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
|
|
90 |
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
|
89 |
<NODE name="setDedupGraphPath" type="SetEnvParameter">
|
|
90 |
<DESCRIPTION>Set the target path to store the deduped graph</DESCRIPTION>
|
|
91 | 91 |
<PARAMETERS> |
92 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM> |
|
93 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_dedup</PARAM> |
|
94 |
</PARAMETERS> |
|
95 |
<ARCS> |
|
96 |
<ARC to="setDedupConfig"/> |
|
97 |
</ARCS> |
|
98 |
</NODE> |
|
99 |
|
|
100 |
<NODE name="setDedupConfig" type="SetDedupConfiguration"> |
|
101 |
<DESCRIPTION>Set Dedup conf</DESCRIPTION> |
|
102 |
<PARAMETERS> |
|
103 |
<PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM> |
|
92 | 104 |
<PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM> |
93 |
<PARAM managedBy="system" name="jobProperty" required="true" type="string">rawSetId</PARAM> |
|
94 |
<PARAM managedBy="system" name="actionSetPathParam" required="true" type="string">actionSetPath</PARAM> |
|
95 | 105 |
</PARAMETERS> |
96 | 106 |
<ARCS> |
97 | 107 |
<ARC to="duplicateScan"/> |
... | ... | |
105 | 115 |
<PARAM required="true" type="string" name="cluster" managedBy="system">IIS</PARAM> |
106 | 116 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
107 | 117 |
{ |
108 |
'rawSet' : 'actionSetPath', |
|
109 |
'actionSetId' : 'setActionSetId' |
|
118 |
'actionSetId' : 'setActionSetId', |
|
119 |
'graphBasePath' : 'rawGraphPath', |
|
120 |
'workingPath' : 'dedupWorkingPath', |
|
121 |
'dedupGraphPath': 'dedupGraphPath' |
|
110 | 122 |
} |
111 | 123 |
</PARAM> |
112 | 124 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
113 | 125 |
{ |
114 |
'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app', |
|
115 |
'graphBasePath' : '/var/lib/dnet/graph_BETA/graph_raw', |
|
116 |
'workingPath' : '/tmp/beta_dedup' |
|
126 |
'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app' |
|
117 | 127 |
} |
118 | 128 |
</PARAM> |
119 | 129 |
<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM> |
120 | 130 |
</PARAMETERS> |
121 | 131 |
<ARCS> |
122 |
<ARC to="updateActionSets"/>
|
|
132 |
<ARC to="dedupConsistency"/>
|
|
123 | 133 |
</ARCS> |
124 | 134 |
</NODE> |
125 | 135 |
|
126 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
127 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
128 |
<PARAMETERS/> |
|
136 |
<NODE name="dedupConsistency" type="SubmitHadoopJob"> |
|
137 |
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION> |
|
138 |
<PARAMETERS> |
|
139 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM> |
|
140 |
<PARAM required="true" type="string" name="cluster" managedBy="system">IIS</PARAM> |
|
141 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
142 |
{ |
|
143 |
'graphBasePath' : 'rawGraphPath', |
|
144 |
'dedupGraphPath': 'dedupGraphPath' |
|
145 |
'workingPath' : 'dedupWorkingPath', |
|
146 |
} |
|
147 |
</PARAM> |
|
148 |
<PARAM required="true" type="string" name="params" managedBy="system"> |
|
149 |
{ |
|
150 |
'oozie.wf.application.path' : '/lib/dnet/dedup/consistency/oozie_app' |
|
151 |
} |
|
152 |
</PARAM> |
|
153 |
<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM> |
|
154 |
</PARAMETERS> |
|
129 | 155 |
<ARCS> |
130 | 156 |
<ARC to="success"/> |
131 | 157 |
</ARCS> |
132 | 158 |
</NODE> |
133 | 159 |
|
160 |
|
|
134 | 161 |
</CONFIGURATION> |
135 | 162 |
<STATUS> |
136 | 163 |
<LAST_EXECUTION_ID>wf_20200320_224915_420</LAST_EXECUTION_ID> |
Also available in: Unified diff
updated standalone dedup wf, according to the new oozie wf specs