Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="74d90d54-bea4-4a79-82d9-adddcc89e660_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Data Provision [OCEAN]</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14

    
15
			<!-- PREPARE NODES -->
16
			<NODE name="setGraphBasePath" type="SetEnvParameter" isStart="true">
17
				<DESCRIPTION>Set the target directory to materialize the graph</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM managedBy="system" name="parameterName" required="true" type="string">graphBasePath</PARAM>
20
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="rawGraph"/>
24
				</ARCS>
25
			</NODE>
26

    
27
			<NODE name="rawGraph" type="SubmitHadoopJob" isJoin="true">
28
				<DESCRIPTION>create the raw graph</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
31
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
32
					<PARAM required="true" type="string" name="envParams" managedBy="system">
33
						{
34
						'graphBasePath' : 'graphBasePath'
35
						}
36
					</PARAM>
37
					<PARAM required="true" type="string" name="params" managedBy="system">
38
						{
39
						'oozie.wf.application.path' : '/lib/dnet/migration/wfs/regular_all_steps/oozie_app',
40
						'mongoURL' : 'mongodb://beta.services.openaire.eu',
41
						'mongoDb' : 'mdstore',
42
						'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
43
						'postgresUser' : 'dnet',
44
						'postgresPassword' : 'dnetPwd',
45
						'workingPath' : '/tmp/dhp_migration_beta',
46
						'reuseContent' : 'true'
47
						}
48
					</PARAM>
49
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
50
				</PARAMETERS>
51
				<ARCS>
52
					<ARC to="setRawGraphPath"/>
53
				</ARCS>
54
			</NODE>
55

    
56
			<NODE name="setRawGraphPath" type="SetEnvParameter">
57
				<DESCRIPTION>Set the path containing the raw graph</DESCRIPTION>
58
				<PARAMETERS>
59
					<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
60
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_raw</PARAM>
61
				</PARAMETERS>
62
				<ARCS>
63
					<ARC to="setDedupWorkingPath"/>
64
				</ARCS>
65
			</NODE>
66

    
67
			<NODE name="setDedupWorkingPath" type="SetEnvParameter">
68
				<DESCRIPTION>Set the path used by dedup workflows to store intermediate data</DESCRIPTION>
69
				<PARAMETERS>
70
					<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupWorkingPath</PARAM>
71
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_dedup</PARAM>
72
				</PARAMETERS>
73
				<ARCS>
74
					<ARC to="setDedupGraphPath"/>
75
				</ARCS>
76
			</NODE>
77

    
78
			<NODE name="setDedupGraphPath" type="SetEnvParameter">
79
				<DESCRIPTION>Set the target path to store the deduped graph</DESCRIPTION>
80
				<PARAMETERS>
81
					<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
82
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/graph_BETA/graph_dedup</PARAM>
83
				</PARAMETERS>
84
				<ARCS>
85
					<ARC to="dedupOrganization"/>
86
				</ARCS>
87
			</NODE>
88

    
89
			<NODE name="dedupOrganization" type="SubmitHadoopJob">
90
				<DESCRIPTION>deduplicate organizations</DESCRIPTION>
91
				<PARAMETERS>
92
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
93
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
94
					<PARAM required="true" type="string" name="envParams" managedBy="system">
95
						{
96
						'graphBasePath' : 'rawGraphPath',
97
						'workingPath' 	: 'dedupWorkingPath',
98
						'dedupGraphPath': 'dedupGraphPath'
99
						}
100
					</PARAM>
101
					<PARAM required="true" type="string" name="params" managedBy="system">
102
						{
103
						'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
104
						'actionSetId' 				: 'dedup-similarity-organization'
105
						}
106
					</PARAM>
107
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
108
				</PARAMETERS>
109
				<ARCS>
110
					<ARC to="dedupResult"/>
111
				</ARCS>
112
			</NODE>
113

    
114
			<NODE name="dedupResult" type="SubmitHadoopJob">
115
				<DESCRIPTION>deduplicate results</DESCRIPTION>
116
				<PARAMETERS>
117
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
118
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
119
					<PARAM required="true" type="string" name="envParams" managedBy="system">
120
						{
121
						'graphBasePath' : 'rawGraphPath',
122
						'workingPath' 	: 'dedupWorkingPath',
123
						'dedupGraphPath': 'dedupGraphPath'
124
						}
125
					</PARAM>
126
					<PARAM required="true" type="string" name="params" managedBy="system">
127
						{
128
						'oozie.wf.application.path' : '/lib/dnet/dedup/scan/oozie_app',
129
						'actionSetId' 				: 'dedup-similarity-result'
130
						}
131
					</PARAM>
132
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
133
				</PARAMETERS>
134
				<ARCS>
135
					<ARC to="dedupConsistency"/>
136
				</ARCS>
137
			</NODE>
138

    
139
			<NODE name="dedupConsistency" type="SubmitHadoopJob">
140
				<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
141
				<PARAMETERS>
142
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">executeOozieJob</PARAM>
143
					<PARAM required="true" type="string" name="cluster"   managedBy="system">IIS</PARAM>
144
					<PARAM required="true" type="string" name="envParams" managedBy="system">
145
						{
146
						'graphBasePath' : 'rawGraphPath',
147
						'workingPath' 	: 'dedupWorkingPath',
148
						'dedupGraphPath': 'dedupGraphPath'
149
						}
150
					</PARAM>
151
					<PARAM required="true" type="string" name="params" managedBy="system">
152
						{
153
						'oozie.wf.application.path' : '/lib/dnet/dedup/consistency/oozie_app'
154
						}
155
					</PARAM>
156
					<PARAM required="true" type="string" name="oozieReportActionsCsv" managedBy="system">build-report</PARAM>
157
				</PARAMETERS>
158
				<ARCS>
159
					<ARC to="success"/>
160
				</ARCS>
161
			</NODE>
162

    
163
		</CONFIGURATION>
164
		<STATUS>
165
		</STATUS>
166
	</BODY>
167
</RESOURCE_PROFILE>
(17-17/26)