Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="35044412-9bee-479b-a8d5-e15ba2fcad3a_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HBASE</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="auto">
15
			<NODE name="findIndex" type="FindIndex" isStart="true">
16
				<DESCRIPTION />
17
				<PARAMETERS>
18
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
19
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
20
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC name="found" to="prepare" />
24
					<ARC name="notFound" to="createIndex" />
25
				</ARCS>
26
			</NODE>
27
			<NODE name="createIndex" type="CreateIndex">
28
				<DESCRIPTION />
29
				<PARAMETERS />
30
				<ARCS>
31
					<ARC to="prepare" />
32
				</ARCS>
33
			</NODE>
34
			<NODE name="prepare" type="PrepareIndexJob">
35
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
36
				<PARAMETERS>
37
					<PARAM name="outputRecordsPathParam"  type="string" managedBy="system" required="true">outputRecordsPath</PARAM>
38
					<PARAM name="rottenRecordsPathParam"  type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
39
					<PARAM required="true" type="string" name="layoutToRecordStylesheet" managedBy="system">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
40
				</PARAMETERS>
41
				<ARCS>
42
					<ARC to="cleanupXml" />
43
					<ARC to="cleanupRotten" />
44
				</ARCS>
45
			</NODE>
46
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
47
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
48
				<PARAMETERS>
49
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
50
					<PARAM required="true" type="string" name="envParams" managedBy="system">
51
						{ 	
52
							'path' : 'outputRecordsPath'
53
						}
54
					</PARAM>					
55
				</PARAMETERS>
56
				<ARCS>
57
					<ARC to="group" />
58
				</ARCS>
59
			</NODE>		
60
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
61
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
62
				<PARAMETERS>
63
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
64
					<PARAM required="true" type="string" name="envParams" managedBy="system">
65
						{ 	
66
							'path' : 'rottenRecordsPath'
67
						}
68
					</PARAM>					
69
				</PARAMETERS>
70
				<ARCS>
71
					<ARC to="group" />
72
					<ARC to="updateStats" />
73
				</ARCS>
74
			</NODE>
75
			<NODE name="group" type="SubmitHadoopJob" isJoin="true">
76
				<DESCRIPTION>M/R group entities</DESCRIPTION>
77
				<PARAMETERS>
78
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
79
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>				
80
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
81
						{ 	
82
							'hbase.mapred.inputtable' : 'hbase.mapred.datatable', 
83
							'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
84
						}
85
					</PARAM>
86
					<PARAM required="true" type="string" name="envParams" managedBy="system">
87
						{ 	
88
							'mapred.output.dir' : 'outputRecordsPath',
89
							'index.entity.links' : 'index.entity.links',
90
							'contextmap' : 'contextmap'
91
						}
92
					</PARAM>
93
				</PARAMETERS>
94
				<ARCS>
95
					<ARC to="updateIndex" />
96
				</ARCS>
97
			</NODE>			
98
			<NODE name="updateIndex" type="SubmitHadoopJob">
99
				<DESCRIPTION>M/O index records</DESCRIPTION>
100
				<PARAMETERS>
101
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
102
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>					
103
					<PARAM required="true" type="string" name="envParams" managedBy="system">
104
						{ 	
105
							'mapred.input.dir' : 'outputRecordsPath',
106
							'mapred.output.dir' : 'rottenRecordsPath',
107
							'index.xslt' : 'index.xslt',
108
							'index.solr.url.zk' : 'index.solr.url.zk',
109
							'index.solr.url.list' : 'index.solr.url.list',
110
							'index.solr.url.local' : 'index.solr.url.local',
111
							'index.solr.collection' : 'index.solr.collection',
112
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
113
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
114
							'index.solr.local.feeding' : 'index.solr.local.feeding',
115
							'index.solr.sim.mode' : 'index.solr.sim.mode',
116
							'index.feed.timestamp' : 'index.feed.timestamp'
117
						}
118
					</PARAM>
119
				</PARAMETERS>
120
				<ARCS>
121
					<ARC to="sync" />
122
				</ARCS>
123
			</NODE>
124
			<NODE name="updateStats" type="SubmitHadoopMapReduce">
125
				<DESCRIPTION>update stats</DESCRIPTION>
126
				<PARAMETERS />
127
				<ARCS>
128
					<ARC to="sync" />
129
				</ARCS>
130
			</NODE>
131
			<NODE name="sync" isJoin="true">
132
				<DESCRIPTION />
133
				<PARAMETERS />
134
				<ARCS>
135
					<ARC to="finalize" />
136
				</ARCS>
137
			</NODE>				
138
			<NODE name="finalize" type="FinalizeIndexFeeding">
139
				<DESCRIPTION>commit changes</DESCRIPTION>
140
				<PARAMETERS />
141
				<ARCS>
142
					<ARC to="updateDs" />
143
				</ARCS>
144
			</NODE>
145
			<NODE name="updateDs" type="IndexDsUpdateJob">
146
				<DESCRIPTION>update DS</DESCRIPTION>
147
				<PARAMETERS />
148
				<ARCS>
149
					<ARC to="switch" />
150
				</ARCS>
151
			</NODE>
152
			<NODE name="switch" type="SwitchIndex">
153
				<DESCRIPTION>switch index</DESCRIPTION>
154
				<PARAMETERS />
155
				<ARCS>
156
					<ARC to="success" />
157
				</ARCS>
158
			</NODE>			
159
		</CONFIGURATION>
160
		<STATUS />
161
	</BODY>
162
</RESOURCE_PROFILE>
163

    
164

    
(2-2/6)