Project

General

Profile

1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="c7d7d775-2db3-474d-85ab-5173a582d515_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="" />
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>OAF to HBase</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="prepareImport" type="PrepareMDStoreImport" isStart="true">
17
				<DESCRIPTION>Configure export to HDFS</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM>
20
					<PARAM required="true" type="string" name="hdfsPath" managedBy="system">/tmp/mdstores_oaf-store-cleaned.seq</PARAM>
21
					<PARAM required="true" type="string" name="xsltParam" managedBy="system">xslt</PARAM>
22
					<PARAM required="true" type="string" name="xslt" managedBy="system">/eu/dnetlib/data/transform/dmf_2_hbase.xsl</PARAM>					
23
				</PARAMETERS>
24
				<ARCS>
25
					<ARC to="reuseHdfsRecords" />
26
				</ARCS>
27
			</NODE>
28
			<NODE name="reuseHdfsRecords" type="ReuseHdfsRecords">
29
				<DESCRIPTION>reuse mdstore records</DESCRIPTION>
30
				<PARAMETERS>
31
					<PARAM required="true" type="boolean" name="reuseMdRecords" managedBy="user">false</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC name="true" to="mapreduce" />
35
					<ARC name="false" to="exportRecords" />
36
				</ARCS>
37
			</NODE>
38
			<NODE name="exportRecords" type="MDStoreBatchExporter">
39
				<DESCRIPTION>Fetch mdstore records</DESCRIPTION>
40
				<PARAMETERS>
41
					<PARAM required="true" type="string" name="format" managedBy="system">OAF</PARAM>
42
					<PARAM required="true" type="string" name="layout" managedBy="system">store</PARAM>
43
					<PARAM required="true" type="string" name="interpretation" managedBy="system">cleaned</PARAM>
44
					<PARAM required="true" type="string" name="outputEprParam" managedBy="system">records_epr</PARAM>
45
				</PARAMETERS>
46
				<ARCS>
47
					<ARC to="storeHdfsRecords" />
48
				</ARCS>
49
			</NODE>
50
			<NODE name="storeHdfsRecords" type="StoreHdfsRecords">
51
				<DESCRIPTION>Store records to HDFS</DESCRIPTION>
52
				<PARAMETERS>
53
					<PARAM required="true" type="string" name="inputEprParam" managedBy="system">records_epr</PARAM>
54
					<PARAM required="true" type="string" name="hdfsPathParam" managedBy="system">hdfsPath</PARAM>
55
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>					
56
				</PARAMETERS>
57
				<ARCS>
58
					<ARC to="mapreduce" />
59
				</ARCS>
60
			</NODE>			
61
			<NODE name="mapreduce" type="SubmitHadoopJob">
62
				<DESCRIPTION>Run M/R import Job</DESCRIPTION>
63
				<PARAMETERS>
64
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
65
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">mdStoreHdfsImportJob</PARAM>
66
					<PARAM required="true" type="string" name="envParams" managedBy="system">
67
						{ 
68
							'mapred.input.dir' : 'hdfsPath', 
69
							'hbase.import.xslt' : 'xslt'
70
						}
71
					</PARAM>	
72
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
73
						{ 
74
							'hbase.mapred.outputtable' : 'hbase.mapred.datatable'
75
						}
76
					</PARAM>
77
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
78
				</PARAMETERS>
79
				<ARCS>
80
					<ARC to="checkOAFResultCount" />
81
				</ARCS>
82
			</NODE>
83
			<NODE name="checkOAFResultCount" type="CheckHDFSCount">
84
				<DESCRIPTION>Checks if the number of publications stored on HBASE is the same as those in the HDFS file</DESCRIPTION>
85
				<PARAMETERS>
86
					<PARAM required="true" type="string" name="numberToVerifyParamName" managedBy="system">mdstore.result</PARAM>
87
					<PARAM required="false" type="string" name="hdfsCounterParamName" managedBy="system">mainlog:storeHdfsRecords:count</PARAM>
88
				</PARAMETERS>
89
				<ARCS>
90
					<ARC to="success" />
91
				</ARCS>
92
			</NODE>
93
		</CONFIGURATION>
94
		<STATUS />
95
	</BODY>
96
</RESOURCE_PROFILE>
97

    
98

    
(4-4/7)