Revision 48139
Added by Alessia Bardi almost 7 years ago
index.hdfs.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
2 |
<RESOURCE_PROFILE> |
|
3 | 3 |
<HEADER> |
4 |
<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
|
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType" />
|
|
6 |
<RESOURCE_KIND value="WorkflowDSResources" />
|
|
7 |
<RESOURCE_URI value="" />
|
|
8 |
<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z" />
|
|
4 |
<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2014-08-01T18:13:51.0Z"/> |
|
9 | 9 |
</HEADER> |
10 | 10 |
<BODY> |
11 | 11 |
<WORKFLOW_NAME>Index from HDFS</WORKFLOW_NAME> |
... | ... | |
13 | 13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
14 | 14 |
<CONFIGURATION start="manual"> |
15 | 15 |
<NODE name="setHdfsFile" type="SetHdfsFile" isStart="true"> |
16 |
<DESCRIPTION />
|
|
16 |
<DESCRIPTION/> |
|
17 | 17 |
<PARAMETERS> |
18 | 18 |
<PARAM name="hdfsPath" type="string" managedBy="user" required="true">/tmp/indexrecords_db_openaireplus_node6_t_DMF.seq</PARAM> |
19 | 19 |
<PARAM name="hdfsPathParam" type="string" managedBy="system" required="true">inputRecordsPath</PARAM> |
20 | 20 |
</PARAMETERS> |
21 | 21 |
<ARCS> |
22 |
<ARC to="prepare" />
|
|
22 |
<ARC to="prepare"/> |
|
23 | 23 |
</ARCS> |
24 | 24 |
</NODE> |
25 | 25 |
<NODE name="findIndex" type="FindIndex" isStart="true"> |
26 |
<DESCRIPTION />
|
|
26 |
<DESCRIPTION/> |
|
27 | 27 |
<PARAMETERS> |
28 | 28 |
<PARAM name="mdFormat" type="string" managedBy="user" required="true">DMF</PARAM> |
29 | 29 |
<PARAM name="layout" type="string" managedBy="system" required="true">index</PARAM> |
30 | 30 |
<PARAM name="interpretation" type="string" managedBy="system" required="true">openaire</PARAM> |
31 | 31 |
</PARAMETERS> |
32 | 32 |
<ARCS> |
33 |
<ARC name="found" to="waitIndex" />
|
|
34 |
<ARC name="notFound" to="createIndex" />
|
|
33 |
<ARC name="found" to="waitIndex"/> |
|
34 |
<ARC name="notFound" to="createIndex"/> |
|
35 | 35 |
</ARCS> |
36 | 36 |
</NODE> |
37 | 37 |
<NODE name="createIndex" type="CreateIndex"> |
38 |
<DESCRIPTION />
|
|
39 |
<PARAMETERS />
|
|
38 |
<DESCRIPTION/> |
|
39 |
<PARAMETERS/> |
|
40 | 40 |
<ARCS> |
41 |
<ARC to="waitIndex" />
|
|
41 |
<ARC to="waitIndex"/> |
|
42 | 42 |
</ARCS> |
43 | 43 |
</NODE> |
44 | 44 |
<NODE name="waitIndex"> |
45 |
<DESCRIPTION />
|
|
46 |
<PARAMETERS />
|
|
45 |
<DESCRIPTION/> |
|
46 |
<PARAMETERS/> |
|
47 | 47 |
<ARCS> |
48 |
<ARC to="prepare" />
|
|
48 |
<ARC to="prepare"/> |
|
49 | 49 |
</ARCS> |
50 | 50 |
</NODE> |
51 | 51 |
<NODE name="prepare" type="PrepareIndexJob" isJoin="true"> |
52 | 52 |
<DESCRIPTION>Prepare indexing</DESCRIPTION> |
53 | 53 |
<PARAMETERS> |
54 |
<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
|
|
54 |
<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM> |
|
55 | 55 |
<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM> |
56 |
<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
|
|
56 |
<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM> |
|
57 | 57 |
</PARAMETERS> |
58 | 58 |
<ARCS> |
59 |
<ARC to="cleanupRotten" />
|
|
59 |
<ARC to="cleanupRotten"/> |
|
60 | 60 |
</ARCS> |
61 |
</NODE>
|
|
61 |
</NODE> |
|
62 | 62 |
<NODE name="cleanupRotten" type="DeleteHdfsPathJob"> |
63 | 63 |
<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION> |
64 | 64 |
<PARAMETERS> |
65 | 65 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
66 | 66 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
67 |
{
|
|
68 |
'path' : 'rottenRecordsPath'
|
|
67 |
{ |
|
68 |
'path' : 'rottenRecordsPath' |
|
69 | 69 |
} |
70 |
</PARAM>
|
|
70 |
</PARAM> |
|
71 | 71 |
</PARAMETERS> |
72 | 72 |
<ARCS> |
73 |
<ARC to="updateIndex" />
|
|
73 |
<ARC to="updateIndex"/> |
|
74 | 74 |
</ARCS> |
75 |
</NODE>
|
|
75 |
</NODE> |
|
76 | 76 |
<NODE name="updateIndex" type="SubmitHadoopJob"> |
77 | 77 |
<DESCRIPTION>M/O index records</DESCRIPTION> |
78 | 78 |
<PARAMETERS> |
79 | 79 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
80 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>
|
|
80 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM> |
|
81 | 81 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
82 |
{
|
|
83 |
'mapred.input.dir' : 'inputRecordsPath',
|
|
84 |
'mapred.output.dir' : 'rottenRecordsPath',
|
|
85 |
'index.xslt' : 'index.xslt',
|
|
86 |
'index.solr.url' : 'index.solr.url',
|
|
87 |
'index.solr.collection' : 'index.solr.collection',
|
|
88 |
'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
|
|
89 |
'index.shutdown.wait.time' : 'index.shutdown.wait.time',
|
|
90 |
'index.solr.sim.mode' : 'index.solr.sim.mode',
|
|
91 |
'index.feed.timestamp' : 'index.feed.timestamp'
|
|
82 |
{ |
|
83 |
'mapred.input.dir' : 'inputRecordsPath', |
|
84 |
'mapred.output.dir' : 'rottenRecordsPath', |
|
85 |
'index.xslt' : 'index.xslt', |
|
86 |
'index.solr.url' : 'index.solr.url',
|
|
87 |
'index.solr.collection' : 'index.solr.collection',
|
|
88 |
'index.buffer.flush.threshold' : 'index.buffer.flush.threshold', |
|
89 |
'index.shutdown.wait.time' : 'index.shutdown.wait.time', |
|
90 |
'index.solr.sim.mode' : 'index.solr.sim.mode', |
|
91 |
'index.feed.timestamp' : 'index.feed.timestamp' |
|
92 | 92 |
} |
93 | 93 |
</PARAM> |
94 | 94 |
</PARAMETERS> |
95 | 95 |
<ARCS> |
96 |
<ARC to="finalize" />
|
|
96 |
<ARC to="finalize"/> |
|
97 | 97 |
</ARCS> |
98 | 98 |
</NODE> |
99 | 99 |
<NODE name="finalize" type="FinalizeIndexFeeding"> |
100 | 100 |
<DESCRIPTION>commit changes</DESCRIPTION> |
101 |
<PARAMETERS />
|
|
101 |
<PARAMETERS/> |
|
102 | 102 |
<ARCS> |
103 |
<ARC to="success" />
|
|
103 |
<ARC to="success"/> |
|
104 | 104 |
</ARCS> |
105 | 105 |
</NODE> |
106 | 106 |
</CONFIGURATION> |
107 |
<STATUS />
|
|
107 |
<STATUS/> |
|
108 | 108 |
</BODY> |
109 | 109 |
</RESOURCE_PROFILE> |
110 | 110 |
|
Also available in: Unified diff
integrated (hopefully) all required changes from dnet40