Project

General

Profile

« Previous | Next » 

Revision 48139

integrated (hopefully) all required changes from dnet40

View differences:

dm.provision.xml
1 1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="a4434d62-d4cd-4c73-a107-bc7c62e6f815_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-08-01T18:13:51.0Z" />
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Data Provision</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
        
15
        	<!-- PREPARE NODES -->
16
            <NODE name="setInfo" type="SetFormatInfo" isStart="true">
17
                <DESCRIPTION>set mdformat, layout, interpretation</DESCRIPTION>
18
                <PARAMETERS>
19
                    <PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM>
20
                    <PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
21
                    <PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
22
                </PARAMETERS>
23
                <ARCS>
24
                    <ARC to="findIndex"/>
25
                </ARCS>
26
            </NODE>
27
            <NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
28
                <DESCRIPTION/>
29
                <PARAMETERS>
30
                    <PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
31
                    <PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
32
                </PARAMETERS>
33
                <ARCS>
34
                    <ARC to="groupEntities"/>
35
                </ARCS>
36
            </NODE>
37
            <NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
38
                <DESCRIPTION/>
39
                <PARAMETERS/>
40
                <ARCS>
41
                    <ARC to="groupEntities"/>
42
                </ARCS>
43
            </NODE>
44
            <NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
45
                <DESCRIPTION/>
46
                <PARAMETERS/>
47
                <ARCS>
48
                    <ARC to="groupEntities"/>
49
                </ARCS>
50
            </NODE>
51
			
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="a4434d62-d4cd-4c73-a107-bc7c62e6f815_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Data Provision</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14

  
15
			<!-- PREPARE NODES -->
16
			<NODE name="setInfo" type="SetFormatInfo" isStart="true">
17
				<DESCRIPTION>set mdformat, layout, interpretation</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM>
20
					<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
21
					<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="findIndex"/>
25
				</ARCS>
26
			</NODE>
27
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
28
				<DESCRIPTION/>
29
				<PARAMETERS>
30
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
31
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="groupEntities"/>
35
				</ARCS>
36
			</NODE>
37
			<NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
38
				<DESCRIPTION/>
39
				<PARAMETERS/>
40
				<ARCS>
41
					<ARC to="groupEntities"/>
42
				</ARCS>
43
			</NODE>
44
			<NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
45
				<DESCRIPTION/>
46
				<PARAMETERS/>
47
				<ARCS>
48
					<ARC to="groupEntities"/>
49
				</ARCS>
50
			</NODE>
51

  
52 52
			<!-- UPDATE INDEX -->
53
            <NODE name="findIndex" type="FindIndex">
54
                <DESCRIPTION/>
55
                <PARAMETERS/>
56
                <ARCS>
57
                    <ARC name="found" to="prepareIndexing"/>
58
                    <ARC name="notFound" to="createIndex"/>
59
                </ARCS>
60
            </NODE>
61
            <NODE name="createIndex" type="CreateIndex">
62
                <DESCRIPTION/>
63
                <PARAMETERS/>
64
                <ARCS>
65
                    <ARC to="prepareIndexing"/>
66
                </ARCS>
67
            </NODE>
68
            <NODE name="prepareIndexing" type="PrepareIndexJob">
69
                <DESCRIPTION>Prepare indexing</DESCRIPTION>
70
                <PARAMETERS>
71
                    <PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
72
                    <PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
73
                    <PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
74
                    <PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
75
                </PARAMETERS>
76
                <ARCS>
77
                    <ARC to="cleanupXml"/>
78
                    <ARC to="cleanupRotten"/>
79
                </ARCS>
80
            </NODE>
81
            <NODE name="cleanupXml" type="DeleteHdfsPathJob">
82
                <DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
83
                <PARAMETERS>
84
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
85
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
86
						{ 	
87
							'path' : 'hdfsRecordsPath'
53
			<NODE name="findIndex" type="FindIndex">
54
				<DESCRIPTION/>
55
				<PARAMETERS/>
56
				<ARCS>
57
					<ARC name="found" to="prepareIndexing"/>
58
					<ARC name="notFound" to="createIndex"/>
59
				</ARCS>
60
			</NODE>
61
			<NODE name="createIndex" type="CreateIndex">
62
				<DESCRIPTION/>
63
				<PARAMETERS/>
64
				<ARCS>
65
					<ARC to="prepareIndexing"/>
66
				</ARCS>
67
			</NODE>
68
			<NODE name="prepareIndexing" type="PrepareIndexJob">
69
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
70
				<PARAMETERS>
71
					<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">hdfsRecordsPath</PARAM>
72
					<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
73
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
74
					<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
75
				</PARAMETERS>
76
				<ARCS>
77
					<ARC to="cleanupXml"/>
78
					<ARC to="cleanupRotten"/>
79
				</ARCS>
80
			</NODE>
81
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
82
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
83
				<PARAMETERS>
84
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
85
					<PARAM required="true" type="string" name="envParams" managedBy="system">
86
						{
87
						'path' : 'hdfsRecordsPath'
88 88
						}
89 89
					</PARAM>
90
                </PARAMETERS>
91
                <ARCS>
92
                    <ARC to="groupEntities"/>
93
                </ARCS>
94
            </NODE>
95
            <NODE name="cleanupRotten" type="DeleteHdfsPathJob">
96
                <DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
97
                <PARAMETERS>
98
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
99
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
100
						{ 	
101
							'path' : 'rottenRecordsPath'
90
				</PARAMETERS>
91
				<ARCS>
92
					<ARC to="groupEntities"/>
93
				</ARCS>
94
			</NODE>
95
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
96
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
97
				<PARAMETERS>
98
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
99
					<PARAM required="true" type="string" name="envParams" managedBy="system">
100
						{
101
						'path' : 'rottenRecordsPath'
102 102
						}
103 103
					</PARAM>
104
                </PARAMETERS>
105
                <ARCS>
106
                    <ARC to="groupEntities"/>
107
                </ARCS>
108
            </NODE>
109
            <NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
110
                <DESCRIPTION>M/R group entities</DESCRIPTION>
111
                <PARAMETERS>
112
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
113
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
114
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
115
						{ 	
116
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
117
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
104
				</PARAMETERS>
105
				<ARCS>
106
					<ARC to="groupEntities"/>
107
				</ARCS>
108
			</NODE>
109
			<NODE name="groupEntities" type="SubmitHadoopJob" isJoin="true">
110
				<DESCRIPTION>M/R group entities</DESCRIPTION>
111
				<PARAMETERS>
112
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
113
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
114
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
115
						{
116
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
117
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
118 118
						}
119 119
					</PARAM>
120
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
121
						{ 	
122
							'mapred.output.dir' : 'hdfsRecordsPath',
123
							'index.entity.links' : 'index.entity.links',
124
							'oaf.schema.location' : 'oaf.schema.location',
125
							'contextmap' : 'contextmap',
126
							'relClasses' : 'relClasses'
120
					<PARAM required="true" type="string" name="envParams" managedBy="system">
121
						{
122
						'mapred.output.dir' : 'hdfsRecordsPath',
123
						'index.entity.links' : 'index.entity.links',
124
						'oaf.schema.location' : 'oaf.schema.location',
125
						'contextmap' : 'contextmap',
126
						'relClasses' : 'relClasses'
127 127
						}
128 128
					</PARAM>
129
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
130
                </PARAMETERS>
131
                <ARCS>
132
                    <ARC to="updateIndex"/>
133
                    <ARC to="setOAIFormat"/>
134
                    <ARC to="prepareStats"/>
135
                </ARCS>
136
            </NODE>
137
            <NODE name="updateIndex" type="SubmitHadoopJob">
138
                <DESCRIPTION>M/O index records</DESCRIPTION>
139
                <PARAMETERS>
140
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
141
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
142
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
143
						{ 	
144
							'mapred.input.dir' : 'hdfsRecordsPath',
145
							'mapred.output.dir' : 'rottenRecordsPath',
146
							'index.xslt' : 'index.xslt',
147
							'index.solr.url' : 'index.solr.url',
148
							'index.solr.collection' : 'index.solr.collection',
149
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
150
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
151
							'index.solr.sim.mode' : 'index.solr.sim.mode',
152
							'index.feed.timestamp' : 'index.feed.timestamp'
129
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
130
				</PARAMETERS>
131
				<ARCS>
132
					<ARC to="updateIndex"/>
133
					<ARC to="setOAIFormat"/>
134
					<ARC to="prepareStats"/>
135
				</ARCS>
136
			</NODE>
137
			<NODE name="updateIndex" type="SubmitHadoopJob">
138
				<DESCRIPTION>M/O index records</DESCRIPTION>
139
				<PARAMETERS>
140
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
141
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
142
					<PARAM required="true" type="string" name="envParams" managedBy="system">
143
						{
144
						'mapred.input.dir' : 'hdfsRecordsPath',
145
						'mapred.output.dir' : 'rottenRecordsPath',
146
						'index.xslt' : 'index.xslt',
147
						'index.solr.url' : 'index.solr.url',
148
						'index.solr.collection' : 'index.solr.collection',
149
						'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
150
						'index.shutdown.wait.time' : 'index.shutdown.wait.time',
151
						'index.solr.sim.mode' : 'index.solr.sim.mode',
152
						'index.feed.timestamp' : 'index.feed.timestamp'
153 153
						}
154 154
					</PARAM>
155
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
156
                </PARAMETERS>
157
                <ARCS>
158
                    <ARC to="finalize"/>
159
                </ARCS>
160
            </NODE>
161
            <NODE name="finalize" type="FinalizeIndexFeeding">
162
                <DESCRIPTION>commit changes</DESCRIPTION>
163
                <PARAMETERS/>
164
                <ARCS>
165
                    <ARC to="updateDs"/>
166
                </ARCS>
167
            </NODE>
168
            <NODE name="updateDs" type="IndexDsUpdateJob">
169
                <DESCRIPTION>update DS</DESCRIPTION>
170
                <PARAMETERS/>
171
                <ARCS>
172
                    <ARC to="waitAll"/>
173
                </ARCS>
174
            </NODE>				
155
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
156
				</PARAMETERS>
157
				<ARCS>
158
					<ARC to="finalize"/>
159
				</ARCS>
160
			</NODE>
161
			<NODE name="finalize" type="FinalizeIndexFeeding">
162
				<DESCRIPTION>commit changes</DESCRIPTION>
163
				<PARAMETERS/>
164
				<ARCS>
165
					<ARC to="updateDs"/>
166
				</ARCS>
167
			</NODE>
168
			<NODE name="updateDs" type="IndexDsUpdateJob">
169
				<DESCRIPTION>update DS</DESCRIPTION>
170
				<PARAMETERS/>
171
				<ARCS>
172
					<ARC to="waitAll"/>
173
				</ARCS>
174
			</NODE>
175 175

  
176 176
			<!--  UPDATE STATS -->
177
            <NODE name="prepareStats" type="PrepareStatsParams">
178
                <DESCRIPTION>set params for stats</DESCRIPTION>
179
                <PARAMETERS/>
180
                <ARCS>
181
                    <ARC to="updateStats"/>
182
                </ARCS>
183
            </NODE>
184
            <NODE name="updateStats" type="SubmitHadoopJob">
185
                <DESCRIPTION>Update stats DB</DESCRIPTION>
186
                <PARAMETERS>
187
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
188
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>
189
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
190
						{ 	
191
							'Stats_Hbase_Source_Table' : 'hbase.mapred.datatable'
177
			<NODE name="prepareStats" type="PrepareStatsParams">
178
				<DESCRIPTION>set params for stats</DESCRIPTION>
179
				<PARAMETERS/>
180
				<ARCS>
181
					<ARC to="updateStats"/>
182
				</ARCS>
183
			</NODE>
184
			<NODE name="updateStats" type="SubmitHadoopJob">
185
				<DESCRIPTION>Update stats DB</DESCRIPTION>
186
				<PARAMETERS>
187
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
188
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">StatsExportJob</PARAM>
189
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
190
						{
191
						'Stats_Hbase_Source_Table' : 'hbase.mapred.datatable'
192 192
						}
193 193
					</PARAM>
194 194
					<PARAM required="true" type="string" name="envParams" managedBy="system">
195
						{ 	
196
							'nameNode' : 'nameNode',
197
							'jobTracker' : 'jobTracker',
198
							'isLookupEndpoint' : 'isLookupEndpoint',
199
							'Stats_indexConf' : 'index.entity.links'
195
						{
196
						'nameNode' : 'nameNode',
197
						'jobTracker' : 'jobTracker',
198
						'isLookupEndpoint' : 'isLookupEndpoint',
199
						'Stats_indexConf' : 'index.entity.links'
200 200
						}
201 201
					</PARAM>
202
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
203
                </PARAMETERS>
204
                <ARCS>
205
                    <ARC to="waitAll"/>
206
                </ARCS>
207
            </NODE>
208
            
202
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
203
				</PARAMETERS>
204
				<ARCS>
205
					<ARC to="waitAll"/>
206
				</ARCS>
207
			</NODE>
208

  
209 209
			<!-- OAI STORE UPDATE -->
210
            <NODE name="setOAIFormat" type="SetFormatInfo">
211
                <DESCRIPTION>set format, layout, interpretation for OAI publisher</DESCRIPTION>
212
                <PARAMETERS>
213
                    <PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM>
214
                    <PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
215
                    <PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
216
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
217
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
218
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
219
                </PARAMETERS>
220
                <ARCS>
221
                    <ARC to="prepareOAI"/>
222
                </ARCS>
223
            </NODE>
224
            <NODE name="prepareOAI" type="PrepareOaiJob">
225
                <DESCRIPTION>Prepare params for OAI store feeding</DESCRIPTION>
226
                <PARAMETERS>
227
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
228
                    <PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM>
229
                    <PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM>
230
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
231
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
232
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
233
                    <PARAM name="skipDuplicates" type="boolean" managedBy="user" required="true">true</PARAM>
234
                    <PARAM name="duplicateXPath" type="string" managedBy="user" required="true">//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']</PARAM>
235
                </PARAMETERS>
236
                <ARCS>
237
                    <ARC to="OAIDropStore"/>
238
                </ARCS>
239
            </NODE>
240
            <NODE name="OAIDropStore" type="OAIDropStore">
241
                <DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION>
242
                <PARAMETERS>
243
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
244
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
245
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
246
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
247
                </PARAMETERS>
248
                <ARCS>
249
                    <ARC to="OAICreateStore"/>
250
                </ARCS>
251
            </NODE>
252
            <NODE name="OAICreateStore" type="OAICreateStore">
253
                <DESCRIPTION/>
254
                <PARAMETERS>
255
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
256
                    <PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
257
                    <PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
258
                    <PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
259
                </PARAMETERS>
260
                <ARCS>
261
                    <ARC to="feedOAI"/>
262
                </ARCS>
263
            </NODE>
264
            <NODE name="feedOAI" type="SubmitHadoopJob">
265
                <DESCRIPTION>M/O oai feeding</DESCRIPTION>
266
                <PARAMETERS>
267
                    <PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
268
                    <PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM>
269
                    <PARAM required="true" type="string" name="envParams" managedBy="system">
270
						{ 	
271
							'mapred.input.dir' : 'hdfsRecordsPath',
272
							'services.publisher.oai.collection' : 'oaiStoreCollection',
273
							'oaiConfiguration' : 'oaiConfiguration',
274
							'oai.feed.date' : 'oai.feed.date',
275
							'services.publisher.oai.db' : 'oai_dbName',
276
							'services.publisher.oai.skipDuplicates':'services.publisher.oai.skipDuplicates',
277
							'services.publisher.oai.duplicateXPath':'services.publisher.oai.duplicateXPath'
210
			<NODE name="setOAIFormat" type="SetFormatInfo">
211
				<DESCRIPTION>set format, layout, interpretation for OAI publisher</DESCRIPTION>
212
				<PARAMETERS>
213
					<PARAM name="format" type="string" managedBy="user" required="true">oaf</PARAM>
214
					<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
215
					<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>
216
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
217
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
218
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
219
				</PARAMETERS>
220
				<ARCS>
221
					<ARC to="prepareOAI"/>
222
				</ARCS>
223
			</NODE>
224
			<NODE name="prepareOAI" type="PrepareOaiJob">
225
				<DESCRIPTION>Prepare params for OAI store feeding</DESCRIPTION>
226
				<PARAMETERS>
227
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
228
					<PARAM name="oaiDBName" type="string" managedBy="user" required="true">oaistore</PARAM>
229
					<PARAM name="oaiDBNameParam" type="string" managedBy="system" required="true">oai_dbName</PARAM>
230
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
231
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
232
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
233
					<PARAM name="skipDuplicates" type="boolean" managedBy="user" required="true">true</PARAM>
234
					<PARAM name="duplicateXPath" type="string" managedBy="user" required="true">//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']</PARAM>
235
				</PARAMETERS>
236
				<ARCS>
237
					<ARC to="OAIDropStore"/>
238
				</ARCS>
239
			</NODE>
240
			<NODE name="OAIDropStore" type="OAIDropStore">
241
				<DESCRIPTION>Drop the store: upserts are too expensive!</DESCRIPTION>
242
				<PARAMETERS>
243
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
244
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
245
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
246
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
247
				</PARAMETERS>
248
				<ARCS>
249
					<ARC to="OAICreateStore"/>
250
				</ARCS>
251
			</NODE>
252
			<NODE name="OAICreateStore" type="OAICreateStore">
253
				<DESCRIPTION/>
254
				<PARAMETERS>
255
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
256
					<PARAM name="formatParam" type="string" managedBy="system" required="true">oai_format</PARAM>
257
					<PARAM name="layoutParam" type="string" managedBy="system" required="true">oai_layout</PARAM>
258
					<PARAM name="interpretationParam" type="string" managedBy="system" required="true">oai_interpretation</PARAM>
259
				</PARAMETERS>
260
				<ARCS>
261
					<ARC to="feedOAI"/>
262
				</ARCS>
263
			</NODE>
264
			<NODE name="feedOAI" type="SubmitHadoopJob">
265
				<DESCRIPTION>M/O oai feeding</DESCRIPTION>
266
				<PARAMETERS>
267
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
268
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">oaiFeedJob</PARAM>
269
					<PARAM required="true" type="string" name="envParams" managedBy="system">
270
						{
271
						'mapred.input.dir' : 'hdfsRecordsPath',
272
						'services.publisher.oai.collection' : 'oaiStoreCollection',
273
						'oaiConfiguration' : 'oaiConfiguration',
274
						'oai.feed.date' : 'oai.feed.date',
275
						'services.publisher.oai.db' : 'oai_dbName',
276
						'services.publisher.oai.skipDuplicates':'services.publisher.oai.skipDuplicates',
277
						'services.publisher.oai.duplicateXPath':'services.publisher.oai.duplicateXPath'
278 278
						}
279 279
					</PARAM>
280
                    <PARAM required="true" type="string" name="sysParams" managedBy="system">
281
						{ 	
282
							'services.publisher.oai.host' : 'services.publisher.oai.host',
283
							'services.publisher.oai.port' : 'services.publisher.oai.port'			
280
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
281
						{
282
						'services.publisher.oai.host' : 'services.publisher.oai.host',
283
						'services.publisher.oai.port' : 'services.publisher.oai.port'
284 284
						}
285 285
					</PARAM>
286
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
287
                </PARAMETERS>
288
                <ARCS>
289
                    <ARC to="CompoundIndexes"/>
290
                </ARCS>
291
            </NODE>
292
            <NODE name="CompoundIndexes" type="OAICreateIndex">
293
                <DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
294
                <PARAMETERS>
295
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
296
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
297
                    <PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
298
                </PARAMETERS>
299
                <ARCS>
300
                    <ARC to="ConfigIndexes"/>
301
                </ARCS>
302
            </NODE>
303
            <NODE name="ConfigIndexes" type="OAIEnsureIndexes">
304
                <DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
305
                <PARAMETERS>
306
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
307
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
308
                </PARAMETERS>
309
                <ARCS>
310
                    <ARC to="SetsCount"/>
311
                    <ARC to="ConfigSets"/>
312
                </ARCS>
313
            </NODE>
314
            <NODE name="SetsCount" type="OAISetsCountUpdate">
315
                <DESCRIPTION>Count records in each OAI set, for each exported metadata format</DESCRIPTION>
316
                <PARAMETERS>
317
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
318
                    <PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
319
                    <PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>
320
                </PARAMETERS>
321
                <ARCS>
322
                    <ARC to="waitAll"/>
323
                </ARCS>
324
            </NODE>
325
            <NODE name="ConfigSets" type="RefreshSetsFromConfig">
326
                <DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
327
                <PARAMETERS>
328
                    <PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
329
                </PARAMETERS>
330
                <ARCS>
331
                    <ARC to="waitAll"/>
332
                </ARCS>
333
            </NODE>
334
           
335
            <!-- WAIT FOR ALL THE WF BRANCHES TO COMPLETE -->
336
            <NODE name="waitAll" isJoin="true">
337
                <DESCRIPTION>wait for all the branches to complete</DESCRIPTION>
338
                <PARAMETERS/>
339
                <ARCS>
340
                    <ARC to="success"/>
341
                </ARCS>
342
            </NODE>            
343
        </CONFIGURATION>
344
        <STATUS>           
345
        </STATUS>
346
    </BODY>
286
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
287
				</PARAMETERS>
288
				<ARCS>
289
					<ARC to="CompoundIndexes"/>
290
				</ARCS>
291
			</NODE>
292
			<NODE name="CompoundIndexes" type="OAICreateIndex">
293
				<DESCRIPTION>Create composite indexes for the OAI store</DESCRIPTION>
294
				<PARAMETERS>
295
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
296
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
297
					<PARAM required="true" type="string" name="fieldNames" managedBy="user">set,datestamp;license,oaftype;set,oaftype;oaftype,funder;resulttypeid,oaftype</PARAM>
298
				</PARAMETERS>
299
				<ARCS>
300
					<ARC to="ConfigIndexes"/>
301
				</ARCS>
302
			</NODE>
303
			<NODE name="ConfigIndexes" type="OAIEnsureIndexes">
304
				<DESCRIPTION>Ensure an index exists on fields as specified in the configuration profile</DESCRIPTION>
305
				<PARAMETERS>
306
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
307
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
308
				</PARAMETERS>
309
				<ARCS>
310
					<ARC to="SetsCount"/>
311
					<ARC to="ConfigSets"/>
312
				</ARCS>
313
			</NODE>
314
			<NODE name="SetsCount" type="OAISetsCountUpdate">
315
				<DESCRIPTION>Count records in each OAI set, for each exported metadata format</DESCRIPTION>
316
				<PARAMETERS>
317
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
318
					<PARAM name="oaiStoreCollectionParam" type="string" managedBy="system" required="true">oaiStoreCollection</PARAM>
319
					<PARAM name="configuredOnly" type="boolean" managedBy="user" required="true">false</PARAM>
320
				</PARAMETERS>
321
				<ARCS>
322
					<ARC to="waitAll"/>
323
				</ARCS>
324
			</NODE>
325
			<NODE name="ConfigSets" type="RefreshSetsFromConfig">
326
				<DESCRIPTION>Reads the current OAI configuration and upsert OAI sets accordingly (counts are updated as well)</DESCRIPTION>
327
				<PARAMETERS>
328
					<PARAM name="dbNameParam" required="true" type="string" managedBy="system">oai_dbName</PARAM>
329
				</PARAMETERS>
330
				<ARCS>
331
					<ARC to="waitAll"/>
332
				</ARCS>
333
			</NODE>
334

  
335
			<!-- WAIT FOR ALL THE WF BRANCHES TO COMPLETE -->
336
			<NODE name="waitAll" isJoin="true">
337
				<DESCRIPTION>wait for all the branches to complete</DESCRIPTION>
338
				<PARAMETERS/>
339
				<ARCS>
340
					<ARC to="success"/>
341
				</ARCS>
342
			</NODE>
343
		</CONFIGURATION>
344
		<STATUS>
345
		</STATUS>
346
	</BODY>
347 347
</RESOURCE_PROFILE>

Also available in: Unified diff