Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="a4434d62-d4cd-4c73-a107-bc7c62e6f815_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-06-25T10:24:32+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11

    
12
			<PARAMETERS>
13

    
14
				<PARAM name="mdFormat" description="metadata format name" required="true" type="string"/>
15
				<PARAM name="layout" description="metadata format name" required="true" type="string" default="index"/>
16
				<PARAM name="interpretation" description="metadata format name" required="true" type="string" default="openaire"/>
17

    
18
				<PARAM name="hbaseTable" description="HBase table name" required="true" type="string"/>
19
				<PARAM name="cluster" description="Hadoop cluster name" required="true" type="string"/>
20

    
21
				<PARAM name="oaiDBName" description="OAI store db name" required="true" type="string"/>
22

    
23
			</PARAMETERS>
24

    
25
			<WORKFLOW>
26

    
27
				<!-- PREPARE NODES -->
28
				<!--
29
				<NODE isStart="true" name="setInfo" type="SetFormatInfo">
30
					<DESCRIPTION>set mdformat, layout, interpretation</DESCRIPTION>
31
					<PARAMETERS>
32
						<PARAM name="format" ref="mdFormat" />
33
						<PARAM name="layout" ref="index" />
34
						<PARAM name="interpretation" ref="openaire" />
35
					</PARAMETERS>
36
					<ARCS>
37
						<ARC to="groupEntities"/>
38
					</ARCS>
39
				</NODE>
40
				-->
41

    
42
				<NODE isStart="true" name="fetchRelClasses" type="FetchRelClasses">
43
					<DESCRIPTION>loads the link semantic profiles</DESCRIPTION>
44
					<PARAMETERS>
45
						<PARAM name="xquery" property="dnet.openaire.model.relclasses.xquery"/>
46
						<PARAM name="relClassesName" value="relClasses"/>
47
					</PARAMETERS>
48
					<ARCS>
49
						<ARC to="fetchContexts"/>
50
					</ARCS>
51
				</NODE>
52

    
53
				<NODE name="fetchContexts" type="LoadContextsJob">
54
					<DESCRIPTION>loads the context profiles</DESCRIPTION>
55
					<PARAMETERS>
56
						<PARAM name="contextParam" value="contextmap"/>
57
					</PARAMETERS>
58
					<ARCS>
59
						<ARC to="fetchEntityLinks"/>
60
					</ARCS>
61
				</NODE>
62

    
63
				<NODE name="fetchEntityLinks" type="LoadEntityLinksJob">
64
					<DESCRIPTION>loads the entity link profile</DESCRIPTION>
65
					<PARAMETERS>
66
						<PARAM name="entityLinksParam" value="index.entity.links"/>
67
					</PARAMETERS>
68
					<ARCS>
69
						<ARC to="groupEntities"/>
70
					</ARCS>
71
				</NODE>
72

    
73

    
74
				<!-- UPDATE INDEX -->
75
				<NODE name="findIndex" type="FindIndex" isStart="true">
76
					<DESCRIPTION>Search for an index DS</DESCRIPTION>
77
					<PARAMETERS>
78
						<PARAM name="mdFormat" ref="mdFormat"/>
79
						<PARAM name="layout" ref="layout"/>
80
						<PARAM name="interpretation" ref="interpretation"/>
81
					</PARAMETERS>
82
					<ARCS>
83
						<ARC name="found" to="prepareIndexing"/>
84
						<ARC name="notFound" to="createIndex"/>
85
					</ARCS>
86
				</NODE>
87

    
88
				<NODE name="createIndex" type="CreateIndex">
89
					<DESCRIPTION>creates the index</DESCRIPTION>
90
					<PARAMETERS>
91
						<PARAM name="format" ref="mdFormat"/>
92
						<PARAM name="layout" ref="layout"/>
93
						<PARAM name="interpretation" ref="interpretation"/>
94
					</PARAMETERS>
95
					<ARCS>
96
						<ARC to="prepareIndexing"/>
97
					</ARCS>
98
				</NODE>
99

    
100
				<NODE name="prepareIndexing" type="PrepareIndexJob">
101
					<DESCRIPTION>Prepare parameters for indexing</DESCRIPTION>
102
					<PARAMETERS>
103
						<PARAM name="hbaseTable" ref="hbaseTable"/>
104
						<PARAM name="outputRecordsPathParam" value="hdfsRecordsPath"/>
105
						<PARAM name="rottenRecordsPathParam" value="rottenRecordsPath"/>
106
						<PARAM name="layoutToRecordStylesheet" value="/eu/dnetlib/msro/workflows/index//openaireLayoutToRecordStylesheet.xsl"/>
107
						<PARAM name="oafSchemaLocation" property="oaf.schema.location"/>
108
					</PARAMETERS>
109
					<ARCS>
110
						<ARC to="cleanupXml"/>
111
						<ARC to="cleanupRotten"/>
112
					</ARCS>
113
				</NODE>
114

    
115
				<NODE name="cleanupXml" type="DeleteHdfsPathJob">
116
					<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
117
					<PARAMETERS>
118
						<PARAM name="cluster" ref="cluster"/>
119
						<PARAM name="path" env="hdfsRecordsPath"/>
120
					</PARAMETERS>
121
					<ARCS>
122
						<ARC to="groupEntities"/>
123
					</ARCS>
124
				</NODE>
125

    
126
				<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
127
					<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
128
					<PARAMETERS>
129
						<PARAM name="cluster" ref="cluster"/>
130
						<PARAM name="path" env="rottenRecordsPath"/>
131
					</PARAMETERS>
132
					<ARCS>
133
						<ARC to="groupEntities"/>
134
					</ARCS>
135
				</NODE>
136

    
137

    
138
				<NODE isJoin="true" name="groupEntities" type="SubmitHadoopJob">
139
					<DESCRIPTION>join the entities</DESCRIPTION>
140
					<PARAMETERS>
141
						<PARAM name="cluster" ref="cluster"/>
142
						<PARAM name="hadoopJob" value="prepareIndexDataJob"/>
143
						<PARAM name="jobParams">
144
							<MAP>
145
								<ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
146
								<ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
147
								<ENTRY key="mapred.output.dir" env="hdfsRecordsPath"/>
148
								<ENTRY key="index.entity.links" env="index.entity.links"/>
149
								<ENTRY key="contextmap" env="contextmap"/>
150
								<ENTRY key="relClasses" env="relClasses"/>
151
								<ENTRY key="oaf.schema.location" property="oaf.schema.location"/>
152
							</MAP>
153
						</PARAM>
154
					</PARAMETERS>
155
					<ARCS>
156
						<ARC to="updateIndex"/>
157
						<!--<ARC to="updateStats"/>-->
158
						<ARC to="updateOAI"/>
159
					</ARCS>
160
				</NODE>
161

    
162

    
163
				<NODE name="updateIndex" type="SubmitHadoopJob">
164
					<DESCRIPTION>feeds the index</DESCRIPTION>
165
					<PARAMETERS>
166
						<PARAM name="cluster" ref="cluster"/>
167
						<PARAM name="hadoopJob" value="indexFeedJob"/>
168
						<PARAM name="jobParams">
169
							<MAP>
170
								<ENTRY key="mapred.input.dir" env="hdfsRecordsPath"/>
171
								<ENTRY key="mapred.output.dir" env="rottenRecordsPath"/>
172
								<ENTRY key="index.xslt" env="index.xslt"/>
173
								<ENTRY key="index.solr.url" env="index.solr.url"/>
174
								<ENTRY key="index.solr.collection" env="index.solr.collection"/>
175
								<ENTRY key="index.buffer.flush.threshold" env="index.buffer.flush.threshold"/>
176
								<ENTRY key="index.shutdown.wait.time" env="index.shutdown.wait.time"/>
177
								<ENTRY key="index.solr.sim.mode" env="index.solr.sim.mode"/>
178
								<ENTRY key="index.feed.timestamp" env="index.feed.timestamp"/>
179
							</MAP>
180
						</PARAM>
181
					</PARAMETERS>
182
					<ARCS>
183
						<ARC to="finalize"/>
184
					</ARCS>
185
				</NODE>
186

    
187
				<NODE name="finalize" type="FinalizeIndexFeeding">
188
					<DESCRIPTION>commit changes</DESCRIPTION>
189
					<PARAMETERS/>
190
					<ARCS>
191
						<ARC to="updateDs"/>
192
					</ARCS>
193
				</NODE>
194

    
195
				<NODE name="updateDs" type="IndexDsUpdateJob">
196
					<DESCRIPTION>update DS</DESCRIPTION>
197
					<PARAMETERS/>
198
					<ARCS>
199
						<ARC to="waitAll"/>
200
					</ARCS>
201
				</NODE>
202

    
203
				<!--  UPDATE STATS -->
204
				<!--
205
				<NODE name="updateStats" type="LaunchWorkflowTemplate">
206
					<DESCRIPTION>update stats</DESCRIPTION>
207
					<PARAMETERS>
208
						<PARAM name="wfTemplateId" value="b921c8fc-00e8-48c3-901a-5343e339d9fe_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
209
						<PARAM name="wfTemplateParams">
210
							<MAP>
211
								<ENTRY key="cluster"                    ref="cluster" />
212
								<ENTRY key="Stats_Hbase_Source_Table"   ref="hbaseTable"/>
213
								<ENTRY key="Stats_indexConf"            env="index.entity.links"/>
214
							</MAP>
215
						</PARAM>
216
					</PARAMETERS>
217
					<ARCS>
218
						<ARC to="waitAll"/>
219
					</ARCS>
220
				</NODE>
221
				-->
222

    
223
				<!-- OAI STORE UPDATE -->
224
				<NODE name="updateOAI" type="LaunchWorkflowTemplate">
225
					<DESCRIPTION>update OAI store</DESCRIPTION>
226
					<PARAMETERS>
227
						<PARAM name="wfTemplateId" value="12f1db9b-3c3e-4fd6-a425-932afd6ef3be_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
228
						<PARAM name="wfTemplateParams">
229
							<MAP>
230
								<ENTRY key="cluster" ref="cluster"/>
231
								<ENTRY key="inputRecordsPath" env="hdfsRecordsPath"/>
232
								<ENTRY key="oaiDBName" ref="oaiDBName"/>
233
							</MAP>
234
						</PARAM>
235
					</PARAMETERS>
236
					<ARCS>
237
						<ARC to="waitAll"/>
238
					</ARCS>
239
				</NODE>
240

    
241
				<!-- WAIT FOR ALL THE WF BRANCHES TO COMPLETE -->
242
				<NODE isJoin="true" name="waitAll">
243
					<DESCRIPTION>wait for all the branches to complete</DESCRIPTION>
244
					<PARAMETERS/>
245
					<ARCS>
246
						<ARC to="success"/>
247
					</ARCS>
248
				</NODE>
249

    
250
			</WORKFLOW>
251

    
252
		</CONFIGURATION>
253

    
254
	</BODY>
255
</RESOURCE_PROFILE>
(19-19/23)