Project

General

Profile

1 26600 sandro.lab
<?xml version="1.0" encoding="UTF-8"?>
2 48139 alessia.ba
<RESOURCE_PROFILE>
3 26600 sandro.lab
	<HEADER>
4 48139 alessia.ba
		<RESOURCE_IDENTIFIER value="35044412-9bee-479b-a8d5-e15ba2fcad3a_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
9 26600 sandro.lab
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HBASE</WORKFLOW_NAME>
12 27202 claudio.at
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13 26600 sandro.lab
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14 28824 claudio.at
		<CONFIGURATION start="manual">
15
			<NODE name="fetchRelClasses" type="FetchRelClasses" isStart="true">
16 48139 alessia.ba
				<DESCRIPTION/>
17 28824 claudio.at
				<PARAMETERS>
18
					<PARAM name="relClassesProperty" type="string" managedBy="system" required="true">dnet.openaire.model.relclasses.xquery</PARAM>
19
					<PARAM name="relClassesName" type="string" managedBy="system" required="true">relClasses</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22 48139 alessia.ba
					<ARC to="group"/>
23 28824 claudio.at
				</ARCS>
24
			</NODE>
25 29397 claudio.at
			<NODE name="fetchContexts" type="LoadContextsJob" isStart="true">
26 48139 alessia.ba
				<DESCRIPTION/>
27 29397 claudio.at
				<PARAMETERS/>
28
				<ARCS>
29 48139 alessia.ba
					<ARC to="group"/>
30 29397 claudio.at
				</ARCS>
31
			</NODE>
32
			<NODE name="fetchEntityLinks" type="LoadEntityLinksJob" isStart="true">
33 48139 alessia.ba
				<DESCRIPTION/>
34 29397 claudio.at
				<PARAMETERS/>
35
				<ARCS>
36 48139 alessia.ba
					<ARC to="group"/>
37 29397 claudio.at
				</ARCS>
38
			</NODE>
39 26600 sandro.lab
			<NODE name="findIndex" type="FindIndex" isStart="true">
40 48139 alessia.ba
				<DESCRIPTION/>
41 26600 sandro.lab
				<PARAMETERS>
42
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
43
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
44
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
45
				</PARAMETERS>
46
				<ARCS>
47 48139 alessia.ba
					<ARC name="found" to="prepare"/>
48
					<ARC name="notFound" to="createIndex"/>
49 26600 sandro.lab
				</ARCS>
50
			</NODE>
51
			<NODE name="createIndex" type="CreateIndex">
52 48139 alessia.ba
				<DESCRIPTION/>
53
				<PARAMETERS/>
54 26600 sandro.lab
				<ARCS>
55 48139 alessia.ba
					<ARC to="prepare"/>
56 26600 sandro.lab
				</ARCS>
57
			</NODE>
58
			<NODE name="prepare" type="PrepareIndexJob">
59
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
60
				<PARAMETERS>
61 48139 alessia.ba
					<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">outputRecordsPath</PARAM>
62
					<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
63 28824 claudio.at
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
64 48139 alessia.ba
					<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
65 26600 sandro.lab
				</PARAMETERS>
66
				<ARCS>
67 48139 alessia.ba
					<ARC to="cleanupXml"/>
68
					<ARC to="cleanupRotten"/>
69 26600 sandro.lab
				</ARCS>
70
			</NODE>
71
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
72
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
73
				<PARAMETERS>
74
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
75
					<PARAM required="true" type="string" name="envParams" managedBy="system">
76 48139 alessia.ba
						{
77
						'path' : 'outputRecordsPath'
78 26600 sandro.lab
						}
79 48139 alessia.ba
					</PARAM>
80 26600 sandro.lab
				</PARAMETERS>
81
				<ARCS>
82 48139 alessia.ba
					<ARC to="group"/>
83 26600 sandro.lab
				</ARCS>
84 48139 alessia.ba
			</NODE>
85 26600 sandro.lab
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
86
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
89
					<PARAM required="true" type="string" name="envParams" managedBy="system">
90 48139 alessia.ba
						{
91
						'path' : 'rottenRecordsPath'
92 26600 sandro.lab
						}
93 48139 alessia.ba
					</PARAM>
94 26600 sandro.lab
				</PARAMETERS>
95
				<ARCS>
96 48139 alessia.ba
					<ARC to="group"/>
97 26600 sandro.lab
				</ARCS>
98
			</NODE>
99
			<NODE name="group" type="SubmitHadoopJob" isJoin="true">
100
				<DESCRIPTION>M/R group entities</DESCRIPTION>
101
				<PARAMETERS>
102
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
103 48139 alessia.ba
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">prepareIndexDataJob</PARAM>
104 26600 sandro.lab
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
105 48139 alessia.ba
						{
106
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
107
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
108 26600 sandro.lab
						}
109
					</PARAM>
110
					<PARAM required="true" type="string" name="envParams" managedBy="system">
111 48139 alessia.ba
						{
112
						'mapred.output.dir' : 'outputRecordsPath',
113
						'index.entity.links' : 'index.entity.links',
114
						'oaf.schema.location' : 'oaf.schema.location',
115
						'contextmap' : 'contextmap',
116
						'relClasses' : 'relClasses'
117 26600 sandro.lab
						}
118
					</PARAM>
119
				</PARAMETERS>
120
				<ARCS>
121 48139 alessia.ba
					<ARC to="updateIndex"/>
122 26600 sandro.lab
				</ARCS>
123 48139 alessia.ba
			</NODE>
124 26600 sandro.lab
			<NODE name="updateIndex" type="SubmitHadoopJob">
125
				<DESCRIPTION>M/O index records</DESCRIPTION>
126
				<PARAMETERS>
127
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
128 48139 alessia.ba
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
129 26600 sandro.lab
					<PARAM required="true" type="string" name="envParams" managedBy="system">
130 48139 alessia.ba
						{
131
						'mapred.input.dir' : 'outputRecordsPath',
132
						'mapred.output.dir' : 'rottenRecordsPath',
133
						'index.xslt' : 'index.xslt',
134
						'index.solr.url' : 'index.solr.url',
135
						'index.solr.collection' : 'index.solr.collection',
136
						'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
137
						'index.shutdown.wait.time' : 'index.shutdown.wait.time',
138
						'index.solr.sim.mode' : 'index.solr.sim.mode',
139
						'index.feed.timestamp' : 'index.feed.timestamp'
140 26600 sandro.lab
						}
141
					</PARAM>
142
				</PARAMETERS>
143
				<ARCS>
144 48139 alessia.ba
					<ARC to="finalize"/>
145 28824 claudio.at
				</ARCS>
146
			</NODE>
147 26600 sandro.lab
			<NODE name="finalize" type="FinalizeIndexFeeding">
148
				<DESCRIPTION>commit changes</DESCRIPTION>
149 48139 alessia.ba
				<PARAMETERS/>
150 26600 sandro.lab
				<ARCS>
151 48139 alessia.ba
					<ARC to="updateDs"/>
152 26600 sandro.lab
				</ARCS>
153
			</NODE>
154
			<NODE name="updateDs" type="IndexDsUpdateJob">
155
				<DESCRIPTION>update DS</DESCRIPTION>
156 48139 alessia.ba
				<PARAMETERS/>
157 26600 sandro.lab
				<ARCS>
158 48139 alessia.ba
					<ARC to="success"/>
159 26600 sandro.lab
				</ARCS>
160
			</NODE>
161
		</CONFIGURATION>
162 48139 alessia.ba
		<STATUS/>
163 26600 sandro.lab
	</BODY>
164
</RESOURCE_PROFILE>
165