Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="f8d1f8f1-2cf4-4f84-af70-ceb11ec120de_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2015-11-12T17:19:38+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>HBase to HDFS</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14

    
15
			<!-- PREPARE NODES -->
16
			<NODE isStart="true" name="setInfo" type="SetFormatInfo">
17
				<DESCRIPTION>set mdformat, layout, interpretation</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM managedBy="user" name="format" required="true" type="string">TMF</PARAM>
20
					<PARAM managedBy="user" name="layout" required="true" type="string">index</PARAM>
21
					<PARAM managedBy="user" name="interpretation" required="true" type="string">openaire</PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="findIndex"/>
25
				</ARCS>
26
			</NODE>
27
			<NODE isStart="true" name="fetchRelClasses" type="FetchRelClasses">
28
				<DESCRIPTION/>
29
				<PARAMETERS>
30
					<PARAM managedBy="system" name="relClassesProperty" required="true" type="string">dnet.openaire.model.relclasses.xquery</PARAM>
31
					<PARAM managedBy="system" name="relClassesName" required="true" type="string">relClasses</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="groupEntities"/>
35
				</ARCS>
36
			</NODE>
37
			<NODE isStart="true" name="fetchContexts" type="LoadContextsJob">
38
				<DESCRIPTION/>
39
				<PARAMETERS/>
40
				<ARCS>
41
					<ARC to="groupEntities"/>
42
				</ARCS>
43
			</NODE>
44
			<NODE isStart="true" name="fetchEntityLinks" type="LoadEntityLinksJob">
45
				<DESCRIPTION/>
46
				<PARAMETERS/>
47
				<ARCS>
48
					<ARC to="groupEntities"/>
49
				</ARCS>
50
			</NODE>
51

    
52
			<!-- UPDATE INDEX -->
53
			<NODE name="findIndex" type="FindIndex">
54
				<DESCRIPTION/>
55
				<PARAMETERS/>
56
				<ARCS>
57
					<ARC name="found" to="prepareIndexing"/>
58
					<ARC name="notFound" to="createIndex"/>
59
				</ARCS>
60
			</NODE>
61
			<NODE name="createIndex" type="CreateIndex">
62
				<DESCRIPTION/>
63
				<PARAMETERS/>
64
				<ARCS>
65
					<ARC to="prepareIndexing"/>
66
				</ARCS>
67
			</NODE>
68
			<NODE name="prepareIndexing" type="PrepareIndexJob">
69
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
70
				<PARAMETERS>
71
					<PARAM managedBy="system" name="outputRecordsPathParam" required="true" type="string">hdfsRecordsPath</PARAM>
72
					<PARAM managedBy="system" name="rottenRecordsPathParam" required="true" type="string">rottenRecordsPath</PARAM>
73
					<PARAM managedBy="system" name="layoutToRecordStylesheet" required="true"
74
					       type="string">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl
75
					</PARAM>
76
					<PARAM managedBy="system" name="oafSchemaLocationProperty" required="true" type="string">oaf.schema.location</PARAM>
77
				</PARAMETERS>
78
				<ARCS>
79
					<ARC to="cleanupXml"/>
80
					<ARC to="cleanupRotten"/>
81
				</ARCS>
82
			</NODE>
83
			<NODE name="cleanupXml" type="DeleteHdfsPathJob">
84
				<DESCRIPTION>hdfs cleanup (xml)</DESCRIPTION>
85
				<PARAMETERS>
86
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
87
					<PARAM managedBy="system" name="envParams" required="true" type="string">
88
						{
89
						'path' : 'hdfsRecordsPath'
90
						}
91
					</PARAM>
92
				</PARAMETERS>
93
				<ARCS>
94
					<ARC to="groupEntities"/>
95
				</ARCS>
96
			</NODE>
97
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
98
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
99
				<PARAMETERS>
100
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
101
					<PARAM managedBy="system" name="envParams" required="true" type="string">
102
						{
103
						'path' : 'rottenRecordsPath'
104
						}
105
					</PARAM>
106
				</PARAMETERS>
107
				<ARCS>
108
					<ARC to="groupEntities"/>
109
				</ARCS>
110
			</NODE>
111
			<NODE isJoin="true" name="groupEntities" type="SubmitHadoopJob">
112
				<DESCRIPTION>M/R group entities</DESCRIPTION>
113
				<PARAMETERS>
114
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
115
					<PARAM managedBy="system" name="hadoopJob" required="true" type="string">prepareIndexDataJob</PARAM>
116
					<PARAM managedBy="system" name="sysParams" required="true" type="string">
117
						{
118
						'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
119
						'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
120
						}
121
					</PARAM>
122
					<PARAM managedBy="system" name="envParams" required="true" type="string">
123
						{
124
						'mapred.output.dir' : 'hdfsRecordsPath',
125
						'index.entity.links' : 'index.entity.links',
126
						'oaf.schema.location' : 'oaf.schema.location',
127
						'contextmap' : 'contextmap',
128
						'relClasses' : 'relClasses'
129
						}
130
					</PARAM>
131
				</PARAMETERS>
132
				<ARCS>
133
					<ARC to="success"/>
134
				</ARCS>
135
			</NODE>
136
		</CONFIGURATION>
137
		<STATUS>
138
		</STATUS>
139
	</BODY>
140
</RESOURCE_PROFILE>
(10-10/16)