Project

General

Profile

« Previous | Next » 

Revision 58349

updated standalone dedup wf, according to the new oozie wf specs

View differences:

dedup.ocean.xml
4 4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5 5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6 6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-03-20T22:49:01+00:00"/>
7
        <DATE_OF_CREATION value="2020-03-24T11:15:39+00:00"/>
8 8
    </HEADER>
9 9
    <BODY>
10 10
        <WORKFLOW_NAME>Offline Deduplication [OCEAN]</WORKFLOW_NAME>
......
51 51
                        'mongoDb' : 'mdstore',
52 52
                        'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
53 53
                        'postgresUser' : 'dnet',
54
                        'postgresPassword' : 'xxxx',
54
                        'postgresPassword' : 'xxx',
55 55
                        'workingPath' : '/tmp/dhp_migration_beta',
56 56
                        'reuseContent' : 'true'
57 57
                        }
......
60 60
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
61 61
                </PARAMETERS>
62 62
                <ARCS>
63
                    <ARC to="setActionSetId"/>
63
                    <ARC to="setRawGraphPath"/>
64 64
                </ARCS>
65 65
            </NODE>
66 66

  
67
            <NODE name="setActionSetId" type="SetEnvParameter">
68
                <DESCRIPTION>Set action set id</DESCRIPTION>
67
            <NODE name="setRawGraphPath" type="SetEnvParameter">
68
                <DESCRIPTION>Set the path containing the raw graph</DESCRIPTION>
69 69
                <PARAMETERS>
70
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">setActionSetId</PARAM>
71
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-organization</PARAM>
70
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
71
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_raw</PARAM>
72 72
                </PARAMETERS>
73 73
                <ARCS>
74
                    <ARC to="setDedupConfig"/>
74
                    <ARC to="setDedupWorkingPath"/>
75 75
                </ARCS>
76 76
            </NODE>
77 77

  
78
            <NODE name="setDedupConfig" type="SetDedupConfiguration">
79
                <DESCRIPTION>Set Dedup conf</DESCRIPTION>
78
            <NODE name="setDedupWorkingPath" type="SetEnvParameter">
79
                <DESCRIPTION>Set the path used by dedup workflows to store intermediate data</DESCRIPTION>
80 80
                <PARAMETERS>
81
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM>
82
                    <PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
81
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">dedupWorkingPath</PARAM>
82
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_dedup</PARAM>
83 83
                </PARAMETERS>
84 84
                <ARCS>
85
                    <ARC to="prepareActionSet"/>
85
                    <ARC to="setDedupGraphPath"/>
86 86
                </ARCS>
87 87
            </NODE>
88 88

  
89
            <NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
90
                <DESCRIPTION>prepare action sets</DESCRIPTION>
89
            <NODE name="setDedupGraphPath" type="SetEnvParameter">
90
                <DESCRIPTION>Set the target path to store the deduped graph</DESCRIPTION>
91 91
                <PARAMETERS>
92
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
93
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_dedup</PARAM>
94
                </PARAMETERS>
95
                <ARCS>
96
                    <ARC to="setDedupConfig"/>
97
                </ARCS>
98
            </NODE>
99

  
100
            <NODE name="setDedupConfig" type="SetDedupConfiguration">
101
                <DESCRIPTION>Set Dedup conf</DESCRIPTION>
102
                <PARAMETERS>
103
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM>
92 104
                    <PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
93
                    <PARAM managedBy="system" name="jobProperty" required="true" type="string">rawSetId</PARAM>
94
                    <PARAM managedBy="system" name="actionSetPathParam" required="true" type="string">actionSetPath</PARAM>
95 105
                </PARAMETERS>
96 106
                <ARCS>
97 107
                    <ARC to="duplicateScan"/>
......
105 115
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
106 116
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
107 117
                        {
108
                        'rawSet' : 'actionSetPath',
109
                        'actionSetId' : 'setActionSetId'
118
                        'actionSetId' : 'setActionSetId',
119
                        'graphBasePath' : 'rawGraphPath',
120
                        'workingPath' 	: 'dedupWorkingPath',
121
                        'dedupGraphPath': 'dedupGraphPath'
110 122
                        }
111 123
                    </PARAM>
112 124
                    <PARAM required="true" type="string" name="params" managedBy="system">
113 125
                        {
114
                        'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
115
                        'graphBasePath' : '/var/lib/dnet/graph_BETA/graph_raw',
116
                        'workingPath' : '/tmp/beta_dedup'
126
                        'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app'
117 127
                        }
118 128
                    </PARAM>
119 129
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
120 130
                </PARAMETERS>
121 131
                <ARCS>
122
                    <ARC to="updateActionSets"/>
132
                    <ARC to="dedupConsistency"/>
123 133
                </ARCS>
124 134
            </NODE>
125 135

  
126
            <NODE name="updateActionSets" type="UpdateActionSets">
127
                <DESCRIPTION>update action sets</DESCRIPTION>
128
                <PARAMETERS/>
136
            <NODE name="dedupConsistency" type="SubmitHadoopJob">
137
                <DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
138
                <PARAMETERS>
139
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
140
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
141
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
142
                        {
143
                        'graphBasePath' : 'rawGraphPath',
144
                        'dedupGraphPath': 'dedupGraphPath'
145
                        'workingPath' 	: 'dedupWorkingPath',
146
                        }
147
                    </PARAM>
148
                    <PARAM required="true" type="string" name="params" managedBy="system">
149
                        {
150
                        'oozie.wf.application.path' : '/lib/dnet/dedup/consistency/oozie_app'
151
                        }
152
                    </PARAM>
153
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
154
                </PARAMETERS>
129 155
                <ARCS>
130 156
                    <ARC to="success"/>
131 157
                </ARCS>
132 158
            </NODE>
133 159

  
160

  
134 161
        </CONFIGURATION>
135 162
        <STATUS>
136 163
            <LAST_EXECUTION_ID>wf_20200320_224915_420</LAST_EXECUTION_ID>

Also available in: Unified diff