Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
				value="939f1075-70ed-4fbd-9055-ea16e5984531_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
6
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
7
		<RESOURCE_KIND value="WorkflowDSResources"/>
8
		<RESOURCE_URI value=""/>
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>ODF to HBase</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="setTable" type="SetHBaseTable" isStart="true">
17
				<DESCRIPTION>set hbase table</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="table" type="string" managedBy="user" required="true"></PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="mapreduce"/>
23
				</ARCS>
24
			</NODE>
25

    
26
			<NODE name="prepareImport" type="PrepareMDStoreImport" isStart="true">
27
				<DESCRIPTION>Configure export to HDFS</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM>
30
					<PARAM required="true" type="string" name="hdfsPath" managedBy="user">/tmp/mdstores_odf-store-cleaned.seq</PARAM>
31
					<PARAM required="true" type="string" name="mappingParam" managedBy="system">xslt</PARAM>
32
					<PARAM required="true" type="string" name="mapping" managedBy="user" function="obtainValues('odf2hbaseMappings', {})"></PARAM>
33
				</PARAMETERS>
34
				<ARCS>
35
					<ARC to="reuseHdfsRecords"/>
36
				</ARCS>
37
			</NODE>
38
			<NODE name="reuseHdfsRecords" type="ReuseHdfsRecords">
39
				<DESCRIPTION>reuse mdstore records</DESCRIPTION>
40
				<PARAMETERS>
41
					<PARAM required="true" type="boolean" name="reuseMdRecords" managedBy="user">false</PARAM>
42
				</PARAMETERS>
43
				<ARCS>
44
					<ARC name="true" to="doneExport"/>
45
					<ARC name="false" to="exportRecords"/>
46
				</ARCS>
47
			</NODE>
48
			<NODE name="exportRecords" type="MDStoreBatchExporter">
49
				<DESCRIPTION>Fetch mdstore records</DESCRIPTION>
50
				<PARAMETERS>
51
					<PARAM required="true" type="string" name="format" managedBy="system">ODF</PARAM>
52
					<PARAM required="true" type="string" name="layout" managedBy="system">store</PARAM>
53
					<PARAM required="true" type="string" name="interpretation" managedBy="system">cleaned</PARAM>
54
					<PARAM required="true" type="string" name="outputEprParam" managedBy="system">records_epr</PARAM>
55
				</PARAMETERS>
56
				<ARCS>
57
					<ARC to="storeHdfsRecords"/>
58
				</ARCS>
59
			</NODE>
60
			<NODE name="storeHdfsRecords" type="StoreHdfsRecords">
61
				<DESCRIPTION>Store records to HDFS</DESCRIPTION>
62
				<PARAMETERS>
63
					<PARAM required="true" type="string" name="inputEprParam" managedBy="system">records_epr</PARAM>
64
					<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM>
65
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
66
				</PARAMETERS>
67
				<ARCS>
68
					<ARC to="doneExport"/>
69
				</ARCS>
70
			</NODE>
71
			<NODE name="doneExport">
72
				<DESCRIPTION></DESCRIPTION>
73
				<PARAMETERS/>
74
				<ARCS>
75
					<ARC to="mapreduce"/>
76
				</ARCS>
77
			</NODE>
78
			<NODE name="mapreduce" type="SubmitHadoopJob" isJoin="true">
79
				<DESCRIPTION>Run M/R import Job</DESCRIPTION>
80
				<PARAMETERS>
81
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
82
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">mdStoreHdfsImportJob</PARAM>
83
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
84
					<PARAM required="true" type="string" name="envParams" managedBy="system">
85
						{
86
						'mapred.input.dir' : 'hdfsPath',
87
						'hbase.import.xslt' : 'xslt',
88
						'hbase.mapred.outputtable' : 'hbaseTable'
89
						}
90
					</PARAM>
91
				</PARAMETERS>
92
				<ARCS>
93
					<ARC to="checkODFResultCount"/>
94
				</ARCS>
95
			</NODE>
96
			<NODE name="checkODFResultCount" type="CheckHDFSCounts">
97
				<DESCRIPTION>Checks if the number of publications stored on HBASE is the same as those in the HDFS file</DESCRIPTION>
98
				<PARAMETERS>
99
					<PARAM required="true" type="string" name="numberToVerifyParamName" managedBy="system">mdstore.result</PARAM>
100
					<PARAM required="false" type="string" name="hdfsCounterParamName" managedBy="system">mainlog:storeHdfsRecords:count</PARAM>
101
				</PARAMETERS>
102
				<ARCS>
103
					<ARC to="success"/>
104
				</ARCS>
105
			</NODE>
106
		</CONFIGURATION>
107
		<STATUS/>
108
	</BODY>
109
</RESOURCE_PROFILE>
110

    
111

    
(6-6/7)