Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="35044412-9bee-479b-a8d5-e15ba2fcad3a_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HBASE</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16
				<DESCRIPTION />
17
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="group" />
23
				</ARCS>
24
			</NODE>
25
			<NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
26
				<DESCRIPTION />
27
				<PARAMETERS/>
28
				<ARCS>
29
					<ARC to="group" />
30
				</ARCS>
31
			</NODE>
32
			<NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
33
				<DESCRIPTION />
34
				<PARAMETERS/>
35
				<ARCS>
36
					<ARC to="group" />
37
				</ARCS>
38
			</NODE>
39
			<NODE name="findIndex" type="FindIndex" isStart="true">
40
				<DESCRIPTION />
41
				<PARAMETERS>
42
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
43
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
44
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
45
				</PARAMETERS>
46
				<ARCS>
47
					<ARC name="found" to="prepare" />
48
					<ARC name="notFound" to="createIndex" />
49
				</ARCS>
50
			</NODE>
51
			<NODE name="createIndex" type="CreateIndex">
52
				<DESCRIPTION />
53
				<PARAMETERS />
54
				<ARCS>
55
					<ARC to="prepare" />
56
				</ARCS>
57
			</NODE>
58
			<NODE name="prepare" type="PrepareIndexJob">
59
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
60
				<PARAMETERS>
61
					<PARAM name="outputRecordsPathParam"  type="string" managedBy="system" required="true">outputRecordsPath</PARAM>
62
					<PARAM name="rottenRecordsPathParam"  type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
63
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
64
					<PARAM name="oafSchemaLocationProperty"  type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
65
				</PARAMETERS>
66
				<ARCS>
67
					<ARC to="cleanupXml" />
68
					<ARC to="cleanupRotten" />
69
				</ARCS>
70
			</NODE>
71
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
72
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
73
				<PARAMETERS>
74
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
75
					<PARAM required="true" type="string" name="envParams" managedBy="system">
76
						{ 	
77
							'path' : 'outputRecordsPath'
78
						}
79
					</PARAM>					
80
				</PARAMETERS>
81
				<ARCS>
82
					<ARC to="group" />
83
				</ARCS>
84
			</NODE>		
85
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
86
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
89
					<PARAM required="true" type="string" name="envParams" managedBy="system">
90
						{ 	
91
							'path' : 'rottenRecordsPath'
92
						}
93
					</PARAM>					
94
				</PARAMETERS>
95
				<ARCS>
96
					<ARC to="group" />
97
				</ARCS>
98
			</NODE>
99
			<NODE name="group" type="SubmitHadoopJob" isJoin="true">
100
				<DESCRIPTION>M/R group entities</DESCRIPTION>
101
				<PARAMETERS>
102
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
103
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>				
104
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
105
						{ 	
106
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
107
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
108
						}
109
					</PARAM>
110
					<PARAM required="true" type="string" name="envParams" managedBy="system">
111
						{ 	
112
							'mapred.output.dir' : 'outputRecordsPath',
113
							'index.entity.links' : 'index.entity.links',
114
							'oaf.schema.location' : 'oaf.schema.location',
115
							'contextmap' : 'contextmap',
116
							'relClasses' : 'relClasses'
117
						}
118
					</PARAM>
119
				</PARAMETERS>
120
				<ARCS>
121
					<ARC to="updateIndex" />
122
				</ARCS>
123
			</NODE>			
124
			<NODE name="updateIndex" type="SubmitHadoopJob">
125
				<DESCRIPTION>M/O index records</DESCRIPTION>
126
				<PARAMETERS>
127
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
128
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>					
129
					<PARAM required="true" type="string" name="envParams" managedBy="system">
130
						{ 	
131
							'mapred.input.dir' : 'outputRecordsPath',
132
							'mapred.output.dir' : 'rottenRecordsPath',
133
							'index.xslt' : 'index.xslt',
134
							'index.solr.url' : 'index.solr.url',
135
							'index.solr.collection' : 'index.solr.collection',
136
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
137
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
138
							'index.solr.sim.mode' : 'index.solr.sim.mode',
139
							'index.feed.timestamp' : 'index.feed.timestamp'
140
						}
141
					</PARAM>
142
				</PARAMETERS>
143
				<ARCS>
144
					<ARC to="finalize" />
145
				</ARCS>
146
			</NODE>
147
			<NODE name="finalize" type="FinalizeIndexFeeding">
148
				<DESCRIPTION>commit changes</DESCRIPTION>
149
				<PARAMETERS />
150
				<ARCS>
151
					<ARC to="updateDs" />
152
					<ARC to="findSearchService" />
153
				</ARCS>
154
			</NODE>
155
			<NODE name="updateDs" type="IndexDsUpdateJob">
156
				<DESCRIPTION>update DS</DESCRIPTION>
157
				<PARAMETERS />
158
				<ARCS>
159
					<ARC to="success" />
160
				</ARCS>
161
			</NODE>
162
			<NODE name="findSearchService" type="FindSearchService">
163
				<DESCRIPTION>find search service</DESCRIPTION>
164
				<PARAMETERS>
165
					<PARAM name="xquery" type="string" managedBy="user" required="false">/RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='SearchServiceResourceType' and .//RESOURCE_KIND/@value='ServiceResources']/HEADER/RESOURCE_IDENTIFIER/@value/string()</PARAM>
166
					<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
167
				</PARAMETERS>
168
				<ARCS>
169
					<ARC name="found" to="switch" />
170
					<ARC name="notFound" to="success" />
171
				</ARCS>
172
			</NODE>			
173
			<NODE name="switch" type="SwitchIndex">
174
				<DESCRIPTION>switch index</DESCRIPTION>
175
				<PARAMETERS>
176
					<PARAM name="xqueryForServiceIdParam" type="string" managedBy="system" required="true">xqueryForSearchService</PARAM>
177
					<PARAM name="inputIndexIdParam" type="string" managedBy="system" required="true">index_id</PARAM>
178
					<PARAM name="outputIndexIdParam" type="string" managedBy="system" required="true">IndexId</PARAM>					
179
				</PARAMETERS>
180
				<ARCS>
181
					<ARC to="success" />
182
				</ARCS>
183
			</NODE>			
184
		</CONFIGURATION>
185
		<STATUS />
186
	</BODY>
187
</RESOURCE_PROFILE>
188

    
189

    
(2-2/10)