Project

General

Profile

« Previous | Next » 

Revision 58329

WIP: defining data provision workflow to be run on the OCEAN cluster

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dedup/dedup.ocean.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2483c5e4-dde8-4452-bd9c-38d436d8d941_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-03-20T22:49:01+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Offline Deduplication [OCEAN]</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Deduplication</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14

  
15
            <NODE name="setGraphBasePath" type="SetEnvParameter" isStart="true">
16
                <DESCRIPTION>Set the target directory to materialize the graph</DESCRIPTION>
17
                <PARAMETERS>
18
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">graphBasePath</PARAM>
19
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA</PARAM>
20
                </PARAMETERS>
21
                <ARCS>
22
                    <ARC to="REUSE_RAW_GRAPH"/>
23
                </ARCS>
24
            </NODE>
25

  
26
            <NODE name="REUSE_RAW_GRAPH" type="Selection">
27
                <DESCRIPTION>Is the raw graph already available?</DESCRIPTION>
28
                <PARAMETERS>
29
                    <PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
30
                </PARAMETERS>
31
                <ARCS>
32
                    <ARC name="YES" to="setActionSetId"/>
33
                    <ARC name="NO" to="rawGraph"/>
34
                </ARCS>
35
            </NODE>
36

  
37
            <NODE name="rawGraph" type="SubmitHadoopJob">
38
                <DESCRIPTION>create the raw graph</DESCRIPTION>
39
                <PARAMETERS>
40
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
41
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
42
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
43
                        {
44
                        'graphBasePath' : 'graphBasePath'
45
                        }
46
                    </PARAM>
47
                    <PARAM required="true" type="string" name="params" managedBy="system">
48
                        {
49
                        'oozie.wf.application.path' : '/lib/dnet/migration/wfs/regular_all_steps/oozie_app',
50
                        'mongoURL' : 'mongodb://beta.services.openaire.eu',
51
                        'mongoDb' : 'mdstore',
52
                        'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
53
                        'postgresUser' : 'dnet',
54
                        'postgresPassword' : 'xxxx',
55
                        'workingPath' : '/tmp/dhp_migration_beta',
56
                        'reuseContent' : 'true'
57
                        }
58
                    </PARAM>
59
                    <PARAM required="true" type="boolean" name="simulation" managedBy="system">false</PARAM>
60
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
61
                </PARAMETERS>
62
                <ARCS>
63
                    <ARC to="setActionSetId"/>
64
                </ARCS>
65
            </NODE>
66

  
67
            <NODE name="setActionSetId" type="SetEnvParameter">
68
                <DESCRIPTION>Set action set id</DESCRIPTION>
69
                <PARAMETERS>
70
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">setActionSetId</PARAM>
71
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-organization</PARAM>
72
                </PARAMETERS>
73
                <ARCS>
74
                    <ARC to="setDedupConfig"/>
75
                </ARCS>
76
            </NODE>
77

  
78
            <NODE name="setDedupConfig" type="SetDedupConfiguration">
79
                <DESCRIPTION>Set Dedup conf</DESCRIPTION>
80
                <PARAMETERS>
81
                    <PARAM function="obtainValues('dedupOrchestrations', {})" managedBy="user" name="dedupConfigSequence" required="true" type="string">dedup-similarity-organization</PARAM>
82
                    <PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
83
                </PARAMETERS>
84
                <ARCS>
85
                    <ARC to="prepareActionSet"/>
86
                </ARCS>
87
            </NODE>
88

  
89
            <NODE name="prepareActionSet" type="PrepareConfiguredActionSet">
90
                <DESCRIPTION>prepare action sets</DESCRIPTION>
91
                <PARAMETERS>
92
                    <PARAM managedBy="system" name="dedupConfigSequenceParam" required="true" type="string">dedup.conf.queue</PARAM>
93
                    <PARAM managedBy="system" name="jobProperty" required="true" type="string">rawSetId</PARAM>
94
                    <PARAM managedBy="system" name="actionSetPathParam" required="true" type="string">actionSetPath</PARAM>
95
                </PARAMETERS>
96
                <ARCS>
97
                    <ARC to="duplicateScan"/>
98
                </ARCS>
99
            </NODE>
100

  
101
            <NODE name="duplicateScan" type="SubmitHadoopJob">
102
                <DESCRIPTION>create the raw graph</DESCRIPTION>
103
                <PARAMETERS>
104
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
105
                    <PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
106
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
107
                        {
108
                        'rawSet' : 'actionSetPath',
109
                        'actionSetId' : 'setActionSetId'
110
                        }
111
                    </PARAM>
112
                    <PARAM required="true" type="string" name="params" managedBy="system">
113
                        {
114
                        'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
115
                        'graphBasePath' : '/var/lib/dnet/graph_BETA/graph_raw',
116
                        'workingPath' : '/tmp/beta_dedup'
117
                        }
118
                    </PARAM>
119
                    <PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
120
                </PARAMETERS>
121
                <ARCS>
122
                    <ARC to="updateActionSets"/>
123
                </ARCS>
124
            </NODE>
125

  
126
            <NODE name="updateActionSets" type="UpdateActionSets">
127
                <DESCRIPTION>update action sets</DESCRIPTION>
128
                <PARAMETERS/>
129
                <ARCS>
130
                    <ARC to="success"/>
131
                </ARCS>
132
            </NODE>
133

  
134
        </CONFIGURATION>
135
        <STATUS>
136
            <LAST_EXECUTION_ID>wf_20200320_224915_420</LAST_EXECUTION_ID>
137
            <LAST_EXECUTION_DATE>2020-03-20T23:07:47+00:00</LAST_EXECUTION_DATE>
138
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
139
            <LAST_EXECUTION_ERROR/>
140
            <LAST_EXECUTION_OUTPUT name="mainlog:storeSimilarities:count">0</LAST_EXECUTION_OUTPUT>
141
            <LAST_EXECUTION_OUTPUT name="mainlog:storeDissimilarities:count">0</LAST_EXECUTION_OUTPUT>
142
        </STATUS>
143
    </BODY>
144
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dedup/dedup.ocean.meta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6ebfea46-0329-4d25-97eb-f06133b992bb_TWV0YVdvcmtmbG93RFNSZXNvdXJjZXMvTWV0YVdvcmtmbG93RFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="MetaWorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="MetaWorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-02-18T15:01:54+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <METAWORKFLOW_NAME family="Deduplication [OCEAN]">Deduplication [OCEAN]</METAWORKFLOW_NAME>
11
        <METAWORKFLOW_DESCRIPTION/>
12
        <METAWORKFLOW_SECTION>InfoSpace Deduplication</METAWORKFLOW_SECTION>
13
        <ADMIN_EMAIL>alessia.bardi@isti.cnr.it,claudio.atzori@isti.cnr.it,michele.debonis@isti.cnr.it</ADMIN_EMAIL>
14
        <CONFIGURATION status="EXECUTABLE">
15
            <WORKFLOW id="2483c5e4-dde8-4452-bd9c-38d436d8d941_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Duplicate scan">
16
            </WORKFLOW>
17
        </CONFIGURATION>
18
        <SCHEDULING enabled="false">
19
            <CRON>29 5 22 ? * *</CRON>
20
            <MININTERVAL>10080</MININTERVAL>
21
        </SCHEDULING>
22
    </BODY>
23
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/ocean.provision.meta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="d511a255-6356-4116-a862-5c1cc4ebd63c_TWV0YVdvcmtmbG93RFNSZXNvdXJjZXMvTWV0YVdvcmtmbG93RFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="MetaWorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="MetaWorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-02-18T15:01:54+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <METAWORKFLOW_NAME family="OpenAIRE Provision [OCEAN]">OpenAIRE Provision [OCEAN]</METAWORKFLOW_NAME>
11
        <METAWORKFLOW_DESCRIPTION/>
12
        <METAWORKFLOW_SECTION>InfoSpace Provision</METAWORKFLOW_SECTION>
13
        <ADMIN_EMAIL>alessia.bardi@isti.cnr.it,claudio.atzori@isti.cnr.it</ADMIN_EMAIL>
14
        <CONFIGURATION status="EXECUTABLE">
15
            <WORKFLOW id="74d90d54-bea4-4a79-82d9-adddcc89e660_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="provision">
16
            </WORKFLOW>
17
        </CONFIGURATION>
18
        <SCHEDULING enabled="false">
19
            <CRON>29 5 22 ? * *</CRON>
20
            <MININTERVAL>10080</MININTERVAL>
21
        </SCHEDULING>
22
    </BODY>
23
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/ocean.provision.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="74d90d54-bea4-4a79-82d9-adddcc89e660_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Data Provision [OCEAN]</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14

  
15
			<!-- PREPARE NODES -->
16
			<NODE name="setGraphBasePath" type="SetEnvParameter" isStart="true">
17
				<DESCRIPTION>Set the target directory to materialize the graph</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM managedBy="system" name="parameterName" required="true" type="string">graphBasePath</PARAM>
20
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="rawGraph"/>
24
				</ARCS>
25
			</NODE>
26

  
27
			<NODE name="rawGraph" type="SubmitHadoopJob" isJoin="true">
28
				<DESCRIPTION>create the raw graph</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
31
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
32
					<PARAM required="true" type="string" name="envParams" managedBy="system">
33
						{
34
						'graphBasePath' : 'graphBasePath'
35
						}
36
					</PARAM>
37
					<PARAM required="true" type="string" name="params" managedBy="system">
38
						{
39
						'oozie.wf.application.path' : '/lib/dnet/migration/wfs/regular_all_steps/oozie_app',
40
						'mongoURL' : 'mongodb://beta.services.openaire.eu',
41
						'mongoDb' : 'mdstore',
42
						'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
43
						'postgresUser' : 'dnet',
44
						'postgresPassword' : 'dnetPwd',
45
						'workingPath' : '/tmp/dhp_migration_beta',
46
						'reuseContent' : 'true'
47
						}
48
					</PARAM>
49
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
50
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
51
				</PARAMETERS>
52
				<ARCS>
53
					<ARC to="success"/>
54
				</ARCS>
55
			</NODE>
56

  
57
		</CONFIGURATION>
58
		<STATUS>
59
		</STATUS>
60
	</BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/provision.ocean.meta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="d511a255-6356-4116-a862-5c1cc4ebd63c_TWV0YVdvcmtmbG93RFNSZXNvdXJjZXMvTWV0YVdvcmtmbG93RFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="MetaWorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="MetaWorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-02-18T15:01:54+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <METAWORKFLOW_NAME family="OpenAIRE Provision [OCEAN]">OpenAIRE Provision [OCEAN]</METAWORKFLOW_NAME>
11
        <METAWORKFLOW_DESCRIPTION/>
12
        <METAWORKFLOW_SECTION>InfoSpace Provision</METAWORKFLOW_SECTION>
13
        <ADMIN_EMAIL>alessia.bardi@isti.cnr.it,claudio.atzori@isti.cnr.it</ADMIN_EMAIL>
14
        <CONFIGURATION status="EXECUTABLE">
15
            <WORKFLOW id="74d90d54-bea4-4a79-82d9-adddcc89e660_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="provision">
16
            </WORKFLOW>
17
        </CONFIGURATION>
18
        <SCHEDULING enabled="false">
19
            <CRON>29 5 22 ? * *</CRON>
20
            <MININTERVAL>10080</MININTERVAL>
21
        </SCHEDULING>
22
    </BODY>
23
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/provision.ocean.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="74d90d54-bea4-4a79-82d9-adddcc89e660_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Data Provision [OCEAN]</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14

  
15
			<!-- PREPARE NODES -->
16
			<NODE name="setGraphBasePath" type="SetEnvParameter" isStart="true">
17
				<DESCRIPTION>Set the target directory to materialize the graph</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM managedBy="system" name="parameterName" required="true" type="string">graphBasePath</PARAM>
20
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="rawGraph"/>
24
				</ARCS>
25
			</NODE>
26

  
27
			<NODE name="rawGraph" type="SubmitHadoopJob" isJoin="true">
28
				<DESCRIPTION>create the raw graph</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
31
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
32
					<PARAM required="true" type="string" name="envParams" managedBy="system">
33
						{
34
						'graphBasePath' : 'graphBasePath'
35
						}
36
					</PARAM>
37
					<PARAM required="true" type="string" name="params" managedBy="system">
38
						{
39
						'oozie.wf.application.path' : '/lib/dnet/migration/wfs/regular_all_steps/oozie_app',
40
						'mongoURL' : 'mongodb://beta.services.openaire.eu',
41
						'mongoDb' : 'mdstore',
42
						'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
43
						'postgresUser' : 'dnet',
44
						'postgresPassword' : 'dnetPwd',
45
						'workingPath' : '/tmp/dhp_migration_beta',
46
						'reuseContent' : 'true'
47
						}
48
					</PARAM>
49
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
50
				</PARAMETERS>
51
				<ARCS>
52
					<ARC to="setRawGraphPath"/>
53
				</ARCS>
54
			</NODE>
55

  
56
			<NODE name="setRawGraphPath" type="SetEnvParameter">
57
				<DESCRIPTION>Set the path containing the raw graph</DESCRIPTION>
58
				<PARAMETERS>
59
					<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
60
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_raw</PARAM>
61
				</PARAMETERS>
62
				<ARCS>
63
					<ARC to="setDedupWorkingPath"/>
64
				</ARCS>
65
			</NODE>
66

  
67
			<NODE name="setDedupWorkingPath" type="SetEnvParameter">
68
				<DESCRIPTION>Set the path used by dedup workflows to store intermediate data</DESCRIPTION>
69
				<PARAMETERS>
70
					<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupWorkingPath</PARAM>
71
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_dedup</PARAM>
72
				</PARAMETERS>
73
				<ARCS>
74
					<ARC to="setDedupGraphPath"/>
75
				</ARCS>
76
			</NODE>
77

  
78
			<NODE name="setDedupGraphPath" type="SetEnvParameter">
79
				<DESCRIPTION>Set the target path to store the deduped graph</DESCRIPTION>
80
				<PARAMETERS>
81
					<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
82
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_dedup</PARAM>
83
				</PARAMETERS>
84
				<ARCS>
85
					<ARC to="dedupOrganization"/>
86
				</ARCS>
87
			</NODE>
88

  
89
			<NODE name="dedupOrganization" type="SubmitHadoopJob">
90
				<DESCRIPTION>deduplicate organizations</DESCRIPTION>
91
				<PARAMETERS>
92
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
93
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
94
					<PARAM required="true" type="string" name="envParams" managedBy="system">
95
						{
96
						'graphBasePath' : 'rawGraphPath',
97
						'workingPath' 	: 'dedupWorkingPath',
98
						'dedupGraphPath': 'dedupGraphPath'
99
						}
100
					</PARAM>
101
					<PARAM required="true" type="string" name="params" managedBy="system">
102
						{
103
						'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
104
						'actionSetId' 				: 'dedup-similarity-organization'
105
						}
106
					</PARAM>
107
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
108
				</PARAMETERS>
109
				<ARCS>
110
					<ARC to="dedupResult"/>
111
				</ARCS>
112
			</NODE>
113

  
114
			<NODE name="dedupResult" type="SubmitHadoopJob">
115
				<DESCRIPTION>deduplicate results</DESCRIPTION>
116
				<PARAMETERS>
117
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
118
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
119
					<PARAM required="true" type="string" name="envParams" managedBy="system">
120
						{
121
						'graphBasePath' : 'rawGraphPath',
122
						'workingPath' 	: 'dedupWorkingPath',
123
						'dedupGraphPath': 'dedupGraphPath'
124
						}
125
					</PARAM>
126
					<PARAM required="true" type="string" name="params" managedBy="system">
127
						{
128
						'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
129
						'actionSetId' 				: 'dedup-similarity-result'
130
						}
131
					</PARAM>
132
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
133
				</PARAMETERS>
134
				<ARCS>
135
					<ARC to="dedupConsistency"/>
136
				</ARCS>
137
			</NODE>
138

  
139
			<NODE name="dedupConsistency" type="SubmitHadoopJob">
140
				<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
141
				<PARAMETERS>
142
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
143
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
144
					<PARAM required="true" type="string" name="envParams" managedBy="system">
145
						{
146
						'graphBasePath' : 'rawGraphPath',
147
						'workingPath' 	: 'dedupWorkingPath',
148
						'dedupGraphPath': 'dedupGraphPath'
149
						}
150
					</PARAM>
151
					<PARAM required="true" type="string" name="params" managedBy="system">
152
						{
153
						'oozie.wf.application.path' : '/lib/dnet/dedup/consistency/oozie_app'
154
						}
155
					</PARAM>
156
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
157
				</PARAMETERS>
158
				<ARCS>
159
					<ARC to="success"/>
160
				</ARCS>
161
			</NODE>
162

  
163
		</CONFIGURATION>
164
		<STATUS>
165
		</STATUS>
166
	</BODY>
167
</RESOURCE_PROFILE>

Also available in: Unified diff