Project

General

Profile

« Previous | Next » 

Revision 48139

integrated (hopefully) all required changes from dnet40

View differences:

index.hdfs.xml
1 1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
<RESOURCE_PROFILE>
3 3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z" />
4
		<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/>
9 9
	</HEADER>
10 10
	<BODY>
11 11
		<WORKFLOW_NAME>Index from HDFS</WORKFLOW_NAME>
......
13 13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14 14
		<CONFIGURATION start="manual">
15 15
			<NODE name="setHdfsFile" type="SetHdfsFile" isStart="true">
16
				<DESCRIPTION />
16
				<DESCRIPTION/>
17 17
				<PARAMETERS>
18 18
					<PARAM name="hdfsPath" type="string" managedBy="user" required="true">/tmp/indexrecords_db_openaireplus_node6_t_DMF.seq</PARAM>
19 19
					<PARAM name="hdfsPathParam" type="string" managedBy="system" required="true">inputRecordsPath</PARAM>
20 20
				</PARAMETERS>
21 21
				<ARCS>
22
					<ARC to="prepare" />
22
					<ARC to="prepare"/>
23 23
				</ARCS>
24 24
			</NODE>
25 25
			<NODE name="findIndex" type="FindIndex" isStart="true">
26
				<DESCRIPTION />
26
				<DESCRIPTION/>
27 27
				<PARAMETERS>
28 28
					<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM>
29 29
					<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM>
30 30
					<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM>
31 31
				</PARAMETERS>
32 32
				<ARCS>
33
					<ARC name="found" to="waitIndex" />
34
					<ARC name="notFound" to="createIndex" />
33
					<ARC name="found" to="waitIndex"/>
34
					<ARC name="notFound" to="createIndex"/>
35 35
				</ARCS>
36 36
			</NODE>
37 37
			<NODE name="createIndex" type="CreateIndex">
38
				<DESCRIPTION />
39
				<PARAMETERS />
38
				<DESCRIPTION/>
39
				<PARAMETERS/>
40 40
				<ARCS>
41
					<ARC to="waitIndex" />
41
					<ARC to="waitIndex"/>
42 42
				</ARCS>
43 43
			</NODE>
44 44
			<NODE name="waitIndex">
45
				<DESCRIPTION />
46
				<PARAMETERS />
45
				<DESCRIPTION/>
46
				<PARAMETERS/>
47 47
				<ARCS>
48
					<ARC to="prepare" />
48
					<ARC to="prepare"/>
49 49
				</ARCS>
50 50
			</NODE>
51 51
			<NODE name="prepare" type="PrepareIndexJob" isJoin="true">
52 52
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
53 53
				<PARAMETERS>
54
					<PARAM name="rottenRecordsPathParam"  type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
54
					<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
55 55
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
56
					<PARAM name="oafSchemaLocationProperty"  type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
56
					<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
57 57
				</PARAMETERS>
58 58
				<ARCS>
59
					<ARC to="cleanupRotten" />
59
					<ARC to="cleanupRotten"/>
60 60
				</ARCS>
61
			</NODE>	
61
			</NODE>
62 62
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
63 63
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
64 64
				<PARAMETERS>
65 65
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
66 66
					<PARAM required="true" type="string" name="envParams" managedBy="system">
67
						{ 	
68
							'path' : 'rottenRecordsPath'
67
						{
68
						'path' : 'rottenRecordsPath'
69 69
						}
70
					</PARAM>					
70
					</PARAM>
71 71
				</PARAMETERS>
72 72
				<ARCS>
73
					<ARC to="updateIndex" />
73
					<ARC to="updateIndex"/>
74 74
				</ARCS>
75
			</NODE>			
75
			</NODE>
76 76
			<NODE name="updateIndex" type="SubmitHadoopJob">
77 77
				<DESCRIPTION>M/O index records</DESCRIPTION>
78 78
				<PARAMETERS>
79 79
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
80
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>					
80
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
81 81
					<PARAM required="true" type="string" name="envParams" managedBy="system">
82
						{ 	
83
							'mapred.input.dir' : 'inputRecordsPath',
84
							'mapred.output.dir' : 'rottenRecordsPath',
85
							'index.xslt' : 'index.xslt',
86
                           	'index.solr.url' : 'index.solr.url',
87
                            'index.solr.collection' : 'index.solr.collection',
88
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
89
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
90
							'index.solr.sim.mode' : 'index.solr.sim.mode',
91
							'index.feed.timestamp' : 'index.feed.timestamp'
82
						{
83
						'mapred.input.dir' : 'inputRecordsPath',
84
						'mapred.output.dir' : 'rottenRecordsPath',
85
						'index.xslt' : 'index.xslt',
86
						'index.solr.url' : 'index.solr.url',
87
						'index.solr.collection' : 'index.solr.collection',
88
						'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
89
						'index.shutdown.wait.time' : 'index.shutdown.wait.time',
90
						'index.solr.sim.mode' : 'index.solr.sim.mode',
91
						'index.feed.timestamp' : 'index.feed.timestamp'
92 92
						}
93 93
					</PARAM>
94 94
				</PARAMETERS>
95 95
				<ARCS>
96
					<ARC to="finalize" />
96
					<ARC to="finalize"/>
97 97
				</ARCS>
98 98
			</NODE>
99 99
			<NODE name="finalize" type="FinalizeIndexFeeding">
100 100
				<DESCRIPTION>commit changes</DESCRIPTION>
101
				<PARAMETERS />
101
				<PARAMETERS/>
102 102
				<ARCS>
103
					<ARC to="success" />
103
					<ARC to="success"/>
104 104
				</ARCS>
105 105
			</NODE>
106 106
		</CONFIGURATION>
107
		<STATUS />
107
		<STATUS/>
108 108
	</BODY>
109 109
</RESOURCE_PROFILE>
110 110

  

Also available in: Unified diff