Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="35044412-9bee-479b-a8d5-e15ba2fcad3a_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HBASE</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16
				<DESCRIPTION/>
17
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="group"/>
23
				</ARCS>
24
			</NODE>
25
			<NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
26
				<DESCRIPTION/>
27
				<PARAMETERS/>
28
				<ARCS>
29
					<ARC to="group"/>
30
				</ARCS>
31
			</NODE>
32
			<NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
33
				<DESCRIPTION/>
34
				<PARAMETERS/>
35
				<ARCS>
36
					<ARC to="group"/>
37
				</ARCS>
38
			</NODE>
39
			<NODE name="findIndex" type="FindIndex" isStart="true">
40
				<DESCRIPTION/>
41
				<PARAMETERS>
42
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
43
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
44
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
45
				</PARAMETERS>
46
				<ARCS>
47
					<ARC name="found" to="prepare"/>
48
					<ARC name="notFound" to="createIndex"/>
49
				</ARCS>
50
			</NODE>
51
			<NODE name="createIndex" type="CreateIndex">
52
				<DESCRIPTION/>
53
				<PARAMETERS/>
54
				<ARCS>
55
					<ARC to="prepare"/>
56
				</ARCS>
57
			</NODE>
58
			<NODE name="prepare" type="PrepareIndexJob">
59
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
60
				<PARAMETERS>
61
					<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">outputRecordsPath</PARAM>
62
					<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
63
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
64
					<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
65
				</PARAMETERS>
66
				<ARCS>
67
					<ARC to="cleanupXml"/>
68
					<ARC to="cleanupRotten"/>
69
				</ARCS>
70
			</NODE>
71
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
72
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
73
				<PARAMETERS>
74
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
75
					<PARAM required="true" type="string" name="envParams" managedBy="system">
76
						{
77
						'path' : 'outputRecordsPath'
78
						}
79
					</PARAM>
80
				</PARAMETERS>
81
				<ARCS>
82
					<ARC to="group"/>
83
				</ARCS>
84
			</NODE>
85
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
86
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
89
					<PARAM required="true" type="string" name="envParams" managedBy="system">
90
						{
91
						'path' : 'rottenRecordsPath'
92
						}
93
					</PARAM>
94
				</PARAMETERS>
95
				<ARCS>
96
					<ARC to="group"/>
97
				</ARCS>
98
			</NODE>
99
			<NODE name="group" type="SubmitHadoopJob" isJoin="true">
100
				<DESCRIPTION>M/R group entities</DESCRIPTION>
101
				<PARAMETERS>
102
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
103
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
104
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
105
						{
106
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
107
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
108
						}
109
					</PARAM>
110
					<PARAM required="true" type="string" name="envParams" managedBy="system">
111
						{
112
						'mapred.output.dir' : 'outputRecordsPath',
113
						'index.entity.links' : 'index.entity.links',
114
						'oaf.schema.location' : 'oaf.schema.location',
115
						'contextmap' : 'contextmap',
116
						'relClasses' : 'relClasses'
117
						}
118
					</PARAM>
119
				</PARAMETERS>
120
				<ARCS>
121
					<ARC to="updateIndex"/>
122
				</ARCS>
123
			</NODE>
124
			<NODE name="updateIndex" type="SubmitHadoopJob">
125
				<DESCRIPTION>M/O index records</DESCRIPTION>
126
				<PARAMETERS>
127
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
128
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
129
					<PARAM required="true" type="string" name="envParams" managedBy="system">
130
						{
131
						'mapred.input.dir' : 'outputRecordsPath',
132
						'mapred.output.dir' : 'rottenRecordsPath',
133
						'index.xslt' : 'index.xslt',
134
						'index.solr.url' : 'index.solr.url',
135
						'index.solr.collection' : 'index.solr.collection',
136
						'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
137
						'index.shutdown.wait.time' : 'index.shutdown.wait.time',
138
						'index.solr.sim.mode' : 'index.solr.sim.mode',
139
						'index.feed.timestamp' : 'index.feed.timestamp'
140
						}
141
					</PARAM>
142
				</PARAMETERS>
143
				<ARCS>
144
					<ARC to="finalize"/>
145
				</ARCS>
146
			</NODE>
147
			<NODE name="finalize" type="FinalizeIndexFeeding">
148
				<DESCRIPTION>commit changes</DESCRIPTION>
149
				<PARAMETERS/>
150
				<ARCS>
151
					<ARC to="updateDs"/>
152
				</ARCS>
153
			</NODE>
154
			<NODE name="updateDs" type="IndexDsUpdateJob">
155
				<DESCRIPTION>update DS</DESCRIPTION>
156
				<PARAMETERS/>
157
				<ARCS>
158
					<ARC to="success"/>
159
				</ARCS>
160
			</NODE>
161
		</CONFIGURATION>
162
		<STATUS/>
163
	</BODY>
164
</RESOURCE_PROFILE>
165

    
166

    
(6-6/24)