Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HDFS</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="setHdfsFile" type="SetHdfsFile" isStart="true">
16
				<DESCRIPTION/>
17
				<PARAMETERS>
18
					<PARAM name="hdfsPath" type="string" managedBy="user" required="true">/tmp/indexrecords_db_openaireplus_node6_t_DMF.seq</PARAM>
19
					<PARAM name="hdfsPathParam" type="string" managedBy="system" required="true">inputRecordsPath</PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="prepare"/>
23
				</ARCS>
24
			</NODE>
25
			<NODE name="findIndex" type="FindIndex" isStart="true">
26
				<DESCRIPTION/>
27
				<PARAMETERS>
28
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
29
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
30
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
31
				</PARAMETERS>
32
				<ARCS>
33
					<ARC name="found" to="waitIndex"/>
34
					<ARC name="notFound" to="createIndex"/>
35
				</ARCS>
36
			</NODE>
37
			<NODE name="createIndex" type="CreateIndex">
38
				<DESCRIPTION/>
39
				<PARAMETERS/>
40
				<ARCS>
41
					<ARC to="waitIndex"/>
42
				</ARCS>
43
			</NODE>
44
			<NODE name="waitIndex">
45
				<DESCRIPTION/>
46
				<PARAMETERS/>
47
				<ARCS>
48
					<ARC to="prepare"/>
49
				</ARCS>
50
			</NODE>
51
			<NODE name="prepare" type="PrepareIndexJob" isJoin="true">
52
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
53
				<PARAMETERS>
54
					<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
55
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
56
					<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
57
				</PARAMETERS>
58
				<ARCS>
59
					<ARC to="cleanupRotten"/>
60
				</ARCS>
61
			</NODE>
62
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
63
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
64
				<PARAMETERS>
65
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
66
					<PARAM required="true" type="string" name="envParams" managedBy="system">
67
						{
68
						'path' : 'rottenRecordsPath'
69
						}
70
					</PARAM>
71
				</PARAMETERS>
72
				<ARCS>
73
					<ARC to="updateIndex"/>
74
				</ARCS>
75
			</NODE>
76
			<NODE name="updateIndex" type="SubmitHadoopJob">
77
				<DESCRIPTION>M/O index records</DESCRIPTION>
78
				<PARAMETERS>
79
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
80
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
81
					<PARAM required="true" type="string" name="envParams" managedBy="system">
82
						{
83
						'mapred.input.dir' : 'inputRecordsPath',
84
						'mapred.output.dir' : 'rottenRecordsPath',
85
						'index.xslt' : 'index.xslt',
86
						'index.solr.url' : 'index.solr.url',
87
						'index.solr.collection' : 'index.solr.collection',
88
						'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
89
						'index.shutdown.wait.time' : 'index.shutdown.wait.time',
90
						'index.solr.sim.mode' : 'index.solr.sim.mode',
91
						'index.feed.timestamp' : 'index.feed.timestamp'
92
						}
93
					</PARAM>
94
				</PARAMETERS>
95
				<ARCS>
96
					<ARC to="finalize"/>
97
				</ARCS>
98
			</NODE>
99
			<NODE name="finalize" type="FinalizeIndexFeeding">
100
				<DESCRIPTION>commit changes</DESCRIPTION>
101
				<PARAMETERS/>
102
				<ARCS>
103
					<ARC to="success"/>
104
				</ARCS>
105
			</NODE>
106
		</CONFIGURATION>
107
		<STATUS/>
108
	</BODY>
109
</RESOURCE_PROFILE>
110

    
111

    
(7-7/21)