Project

General

Profile

1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2483c5e4-dde8-4452-bd9c-38d436d8d941_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-03-24T11:15:39+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Offline Deduplication [OCEAN]</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14

    
15
            <NODE name="setGraphBasePath" type="SetEnvParameter" isStart="true">
16
                <DESCRIPTION>Set the target directory to materialize the graph</DESCRIPTION>
17
                <PARAMETERS>
18
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">graphBasePath</PARAM>
19
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA</PARAM>
20
                </PARAMETERS>
21
                <ARCS>
22
                    <ARC to="REUSE_RAW_GRAPH"/>
23
                </ARCS>
24
            </NODE>
25

    
26
            <NODE name="REUSE_RAW_GRAPH" type="Selection">
27
                <DESCRIPTION>Is the raw graph already available?</DESCRIPTION>
28
                <PARAMETERS>
29
                    <PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
30
                </PARAMETERS>
31
                <ARCS>
32
                    <ARC name="YES" to="setRawGraphPath"/>
33
                    <ARC name="NO" to="rawGraph"/>
34
                </ARCS>
35
            </NODE>
36

    
37
            <NODE name="rawGraph" type="SubmitHadoopJob">
38
                <DESCRIPTION>create the raw graph</DESCRIPTION>
39
                <PARAMETERS>
40
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
41
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
42
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
43
                        {
44
                        'graphBasePath' : 'graphBasePath'
45
                        }
46
                    </PARAM>
47
                    <PARAM required="true" type="string" name="params" managedBy="system">
48
                        {
49
                        'oozie.wf.application.path' : '/lib/dnet/migration/wfs/regular_all_steps/oozie_app',
50
                        'mongoURL' : 'mongodb://beta.services.openaire.eu',
51
                        'mongoDb' : 'mdstore',
52
                        'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
53
                        'postgresUser' : 'dnet',
54
                        'postgresPassword' : 'xxx',
55
                        'workingPath' : '/tmp/dhp_migration_beta',
56
                        'reuseContent' : 'true'
57
                        }
58
                    </PARAM>
59
                    <PARAM required="true" type="boolean" name="simulation" managedBy="system">false</PARAM>
60
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
61
                </PARAMETERS>
62
                <ARCS>
63
                    <ARC to="setRawGraphPath"/>
64
                </ARCS>
65
            </NODE>
66

    
67
            <NODE name="setRawGraphPath" type="SetEnvParameter">
68
                <DESCRIPTION>Set the path containing the raw graph</DESCRIPTION>
69
                <PARAMETERS>
70
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
71
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_raw</PARAM>
72
                </PARAMETERS>
73
                <ARCS>
74
                    <ARC to="setDedupWorkingPath"/>
75
                </ARCS>
76
            </NODE>
77

    
78
            <NODE name="setDedupWorkingPath" type="SetEnvParameter">
79
                <DESCRIPTION>Set the path used by dedup workflows to store intermediate data</DESCRIPTION>
80
                <PARAMETERS>
81
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">dedupWorkingPath</PARAM>
82
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_dedup</PARAM>
83
                </PARAMETERS>
84
                <ARCS>
85
                    <ARC to="setDedupGraphPath"/>
86
                </ARCS>
87
            </NODE>
88

    
89
            <NODE name="setDedupGraphPath" type="SetEnvParameter">
90
                <DESCRIPTION>Set the target path to store the deduped graph</DESCRIPTION>
91
                <PARAMETERS>
92
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
93
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_dedup</PARAM>
94
                </PARAMETERS>
95
                <ARCS>
96
                    <ARC to="setDedupConfig"/>
97
                </ARCS>
98
            </NODE>
99

    
100
            <NODE name="setDedupConfig" type="SetDedupConfiguration">
101
                <DESCRIPTION>Set Dedup conf</DESCRIPTION>
102
                <PARAMETERS>
103
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM>
104
                    <PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
105
                </PARAMETERS>
106
                <ARCS>
107
                    <ARC to="duplicateScan"/>
108
                </ARCS>
109
            </NODE>
110

    
111
            <NODE name="duplicateScan" type="SubmitHadoopJob">
112
                <DESCRIPTION>create the raw graph</DESCRIPTION>
113
                <PARAMETERS>
114
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
115
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
116
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
117
                        {
118
                        'actionSetId' : 'setActionSetId',
119
                        'graphBasePath' : 'rawGraphPath',
120
                        'workingPath' 	: 'dedupWorkingPath',
121
                        'dedupGraphPath': 'dedupGraphPath'
122
                        }
123
                    </PARAM>
124
                    <PARAM required="true" type="string" name="params" managedBy="system">
125
                        {
126
                        'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app'
127
                        }
128
                    </PARAM>
129
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
130
                </PARAMETERS>
131
                <ARCS>
132
                    <ARC to="dedupConsistency"/>
133
                </ARCS>
134
            </NODE>
135

    
136
            <NODE name="dedupConsistency" type="SubmitHadoopJob">
137
                <DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
138
                <PARAMETERS>
139
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
140
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
141
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
142
                        {
143
                        'graphBasePath' : 'rawGraphPath',
144
                        'dedupGraphPath': 'dedupGraphPath'
145
                        'workingPath' 	: 'dedupWorkingPath',
146
                        }
147
                    </PARAM>
148
                    <PARAM required="true" type="string" name="params" managedBy="system">
149
                        {
150
                        'oozie.wf.application.path' : '/lib/dnet/dedup/consistency/oozie_app'
151
                        }
152
                    </PARAM>
153
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
154
                </PARAMETERS>
155
                <ARCS>
156
                    <ARC to="success"/>
157
                </ARCS>
158
            </NODE>
159

    
160

    
161
        </CONFIGURATION>
162
        <STATUS>
163
            <LAST_EXECUTION_ID>wf_20200320_224915_420</LAST_EXECUTION_ID>
164
            <LAST_EXECUTION_DATE>2020-03-20T23:07:47+00:00</LAST_EXECUTION_DATE>
165
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
166
            <LAST_EXECUTION_ERROR/>
167
            <LAST_EXECUTION_OUTPUT name="mainlog:storeSimilarities:count">0</LAST_EXECUTION_OUTPUT>
168
            <LAST_EXECUTION_OUTPUT name="mainlog:storeDissimilarities:count">0</LAST_EXECUTION_OUTPUT>
169
        </STATUS>
170
    </BODY>
171
</RESOURCE_PROFILE>
(2-2/8)