Revision 41141
Added by Eri Katsari over 8 years ago
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/oozie_app/workflow.xml | ||
---|---|---|
1 | 1 |
<workflow-app name="lod_generation" xmlns="uri:oozie:workflow:0.4"> |
2 |
<!-- map reduce job that exports hbase data and prepares them for import
|
|
3 |
to the lod_generation -->
|
|
2 |
<!-- map reduce job that exports hbase data and prepares them for import
|
|
3 |
to the lod_generation -->
|
|
4 | 4 |
|
5 |
<global>
|
|
6 |
<job-tracker>${jobTracker}</job-tracker>
|
|
7 |
<name-node>${nameNode}</name-node>
|
|
8 |
<configuration>
|
|
9 |
<property>
|
|
10 |
<name>mapred.job.queue.name</name>
|
|
11 |
<value>${queueName}</value>
|
|
12 |
</property>
|
|
13 |
<property>
|
|
14 |
<name>oozie.sqoop.log.level</name>
|
|
15 |
<value>DEBUG</value>
|
|
16 |
</property>
|
|
17 |
</configuration>
|
|
18 |
</global>
|
|
5 |
<global>
|
|
6 |
<job-tracker>${jobTracker}</job-tracker>
|
|
7 |
<name-node>${nameNode}</name-node>
|
|
8 |
<configuration>
|
|
9 |
<property>
|
|
10 |
<name>mapred.job.queue.name</name>
|
|
11 |
<value>${queueName}</value>
|
|
12 |
</property>
|
|
13 |
<property>
|
|
14 |
<name>oozie.sqoop.log.level</name>
|
|
15 |
<value>DEBUG</value>
|
|
16 |
</property>
|
|
17 |
</configuration>
|
|
18 |
</global>
|
|
19 | 19 |
|
20 | 20 |
|
21 |
<start to='clearGraph' />
|
|
21 |
<start to='clearGraph'/>
|
|
22 | 22 |
|
23 | 23 |
|
24 |
<action name="csv_export">
|
|
25 |
<map-reduce>
|
|
24 |
<action name="csv_export">
|
|
25 |
<map-reduce>
|
|
26 | 26 |
|
27 |
<prepare>
|
|
28 |
<delete path="${nameNode}${lod_output}" />
|
|
27 |
<prepare>
|
|
28 |
<delete path="${nameNode}${lod_output}"/>
|
|
29 | 29 |
|
30 |
</prepare>
|
|
30 |
</prepare>
|
|
31 | 31 |
|
32 |
<configuration>
|
|
32 |
<configuration>
|
|
33 | 33 |
|
34 |
<property>
|
|
35 |
<name>hbase.mapreduce.scan</name>
|
|
36 |
<value>${wf:actionData('get-scanner')['scan']}</value>
|
|
37 |
</property>
|
|
38 |
<property>
|
|
39 |
<name>hbase.rootdir</name>
|
|
40 |
<value>$nameNode/hbase</value>
|
|
34 |
<property>
|
|
35 |
<name>hbase.mapreduce.scan</name>
|
|
36 |
<value>${wf:actionData('get-scanner')['scan']}</value>
|
|
37 |
</property>
|
|
38 |
<property>
|
|
39 |
<name>hbase.rootdir</name>
|
|
40 |
<value>$nameNode/hbase</value>
|
|
41 | 41 |
|
42 |
</property>
|
|
42 |
</property>
|
|
43 | 43 |
|
44 |
<property>
|
|
45 |
<name>hbase.security.authentication</name>
|
|
46 |
<value>simple</value>
|
|
47 |
</property>
|
|
48 |
<!-- ZOOKEEPER -->
|
|
44 |
<property>
|
|
45 |
<name>hbase.security.authentication</name>
|
|
46 |
<value>simple</value>
|
|
47 |
</property>
|
|
48 |
<!-- ZOOKEEPER -->
|
|
49 | 49 |
|
50 |
<property>
|
|
51 |
<name>hbase.zookeeper.quorum</name>
|
|
52 |
<value>
|
|
53 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
|
|
54 |
</value>
|
|
55 |
<!-- <value>quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
|
|
56 |
</value> -->
|
|
57 |
</property>
|
|
58 |
<property>
|
|
59 |
<name>zookeeper.znode.rootserver</name>
|
|
60 |
<value>root-region-server</value>
|
|
50 |
<property>
|
|
51 |
<name>hbase.zookeeper.quorum</name>
|
|
52 |
<value>
|
|
53 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
|
|
54 |
</value>
|
|
55 |
<!-- <value>quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
|
|
56 |
</value> -->
|
|
57 |
</property>
|
|
58 |
<property>
|
|
59 |
<name>zookeeper.znode.rootserver</name>
|
|
60 |
<value>root-region-server</value>
|
|
61 | 61 |
|
62 |
</property>
|
|
63 |
<property>
|
|
64 |
<name>hbase.zookeeper.property.clientPort</name>
|
|
65 |
<value>2181</value>
|
|
66 |
<!--<value>2182</value> -->
|
|
67 |
</property>
|
|
62 |
</property>
|
|
63 |
<property>
|
|
64 |
<name>hbase.zookeeper.property.clientPort</name>
|
|
65 |
<value>2181</value>
|
|
66 |
<!--<value>2182</value> -->
|
|
67 |
</property>
|
|
68 | 68 |
|
69 | 69 |
|
70 |
<!-- MR IO -->
|
|
70 |
<!-- MR IO -->
|
|
71 | 71 |
|
72 |
<property>
|
|
73 |
<name>mapreduce.inputformat.class</name>
|
|
74 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
|
|
75 |
</property>
|
|
72 |
<property>
|
|
73 |
<name>mapreduce.inputformat.class</name>
|
|
74 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
|
|
75 |
</property>
|
|
76 | 76 |
|
77 |
<property>
|
|
78 |
<name>mapred.mapoutput.key.class</name>
|
|
79 |
<value>org.apache.hadoop.io.Text</value>
|
|
80 |
</property>
|
|
81 |
<property>
|
|
82 |
<name>mapred.mapoutput.value.class</name>
|
|
83 |
<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
|
|
84 |
</property>
|
|
85 |
<property>
|
|
86 |
<name>mapred.output.key.class</name>
|
|
87 |
<value>org.apache.hadoop.io.Text</value>
|
|
88 |
</property>
|
|
89 |
<property>
|
|
90 |
<name>mapred.output.value.class</name>
|
|
91 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
|
|
92 |
</property>
|
|
77 |
<property>
|
|
78 |
<name>mapred.mapoutput.key.class</name>
|
|
79 |
<value>org.apache.hadoop.io.Text</value>
|
|
80 |
</property>
|
|
81 |
<property>
|
|
82 |
<name>mapred.mapoutput.value.class</name>
|
|
83 |
<value>org.apache.hadoop.hbase.io.ImmutableBytesWritable</value>
|
|
84 |
</property>
|
|
85 |
<property>
|
|
86 |
<name>mapred.output.key.class</name>
|
|
87 |
<value>org.apache.hadoop.io.Text</value>
|
|
88 |
</property>
|
|
89 |
<property>
|
|
90 |
<name>mapred.output.value.class</name>
|
|
91 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
|
|
92 |
</property>
|
|
93 | 93 |
|
94 |
<!-- ## This is required for new MapReduce API usage -->
|
|
95 |
<property>
|
|
96 |
<name>mapred.mapper.new-api</name>
|
|
97 |
<value>true</value>
|
|
98 |
</property>
|
|
99 |
<property>
|
|
100 |
<name>mapred.reducer.new-api</name>
|
|
101 |
<value>true</value>
|
|
102 |
</property>
|
|
94 |
<!-- ## This is required for new MapReduce API usage -->
|
|
95 |
<property>
|
|
96 |
<name>mapred.mapper.new-api</name>
|
|
97 |
<value>true</value>
|
|
98 |
</property>
|
|
99 |
<property>
|
|
100 |
<name>mapred.reducer.new-api</name>
|
|
101 |
<value>true</value>
|
|
102 |
</property>
|
|
103 | 103 |
|
104 |
<!-- # Job-specific options -->
|
|
105 |
<property>
|
|
106 |
<name>dfs.blocksize</name>
|
|
107 |
<value>32M</value>
|
|
108 |
</property>
|
|
109 |
<property>
|
|
110 |
<name>mapred.output.compress</name>
|
|
111 |
<value>false</value>
|
|
112 |
</property>
|
|
113 |
<property>
|
|
114 |
<name>mapred.reduce.tasks.speculative.execution</name>
|
|
115 |
<value>false</value>
|
|
116 |
</property>
|
|
117 |
<property>
|
|
118 |
<name>mapred.reduce.tasks.speculative.execution</name>
|
|
119 |
<value>false</value>
|
|
120 |
</property>
|
|
104 |
<!-- # Job-specific options -->
|
|
105 |
<property>
|
|
106 |
<name>dfs.blocksize</name>
|
|
107 |
<value>32M</value>
|
|
108 |
</property>
|
|
109 |
<property>
|
|
110 |
<name>mapred.output.compress</name>
|
|
111 |
<value>false</value>
|
|
112 |
</property>
|
|
113 |
<property>
|
|
114 |
<name>mapred.reduce.tasks.speculative.execution</name>
|
|
115 |
<value>false</value>
|
|
116 |
</property>
|
|
117 |
<property>
|
|
118 |
<name>mapred.reduce.tasks.speculative.execution</name>
|
|
119 |
<value>false</value>
|
|
120 |
</property>
|
|
121 | 121 |
|
122 |
<property>
|
|
123 |
<name>mapreduce.map.speculative</name>
|
|
124 |
<value>false</value>
|
|
125 |
</property>
|
|
122 |
<property>
|
|
123 |
<name>mapreduce.map.speculative</name>
|
|
124 |
<value>false</value>
|
|
125 |
</property>
|
|
126 | 126 |
|
127 |
<!-- I/O FORMAT -->
|
|
128 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required
|
|
129 |
to fix issue with traling tab added between id and value in multiple outputs -->
|
|
130 |
<property>
|
|
131 |
<name>mapred.textoutputformat.separator</name>
|
|
132 |
<value>${lod_delim}</value>
|
|
133 |
</property>
|
|
134 |
<!-- ## Names of all output ports -->
|
|
127 |
<!-- I/O FORMAT -->
|
|
128 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required
|
|
129 |
to fix issue with traling tab added between id and value in multiple outputs -->
|
|
130 |
<property>
|
|
131 |
<name>mapred.textoutputformat.separator</name>
|
|
132 |
<value>${lod_delim}</value>
|
|
133 |
</property>
|
|
134 |
<!-- ## Names of all output ports -->
|
|
135 | 135 |
|
136 |
<property>
|
|
137 |
<name>mapreduce.multipleoutputs</name>
|
|
138 |
<value>
|
|
139 |
${out1} ${out2}
|
|
140 |
</value>
|
|
136 |
<property>
|
|
137 |
<name>mapreduce.multipleoutputs</name>
|
|
138 |
<value>
|
|
139 |
${out1} ${out2}
|
|
140 |
</value>
|
|
141 | 141 |
|
142 |
</property>
|
|
143 |
<property>
|
|
144 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
|
|
145 |
<value>org.apache.hadoop.io.Text</value>
|
|
146 |
</property>
|
|
147 |
<property>
|
|
148 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
|
|
149 |
<value>org.apache.hadoop.io.Text</value>
|
|
150 |
</property>
|
|
151 |
<property>
|
|
152 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
|
|
153 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
154 |
</property>
|
|
155 |
<!-- datasourceLanguage -->
|
|
156 |
<property>
|
|
157 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
|
|
158 |
<value>org.apache.hadoop.io.Text</value>
|
|
159 |
</property>
|
|
160 |
<property>
|
|
161 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
|
|
162 |
<value>org.apache.hadoop.io.Text</value>
|
|
163 |
</property>
|
|
164 |
<property>
|
|
165 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
|
|
166 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
167 |
</property>
|
|
142 |
</property>
|
|
143 |
<property>
|
|
144 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
|
|
145 |
<value>org.apache.hadoop.io.Text</value>
|
|
146 |
</property>
|
|
147 |
<property>
|
|
148 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
|
|
149 |
<value>org.apache.hadoop.io.Text</value>
|
|
150 |
</property>
|
|
151 |
<property>
|
|
152 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
|
|
153 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
154 |
</property>
|
|
155 |
<!-- datasourceLanguage -->
|
|
156 |
<property>
|
|
157 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
|
|
158 |
<value>org.apache.hadoop.io.Text</value>
|
|
159 |
</property>
|
|
160 |
<property>
|
|
161 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
|
|
162 |
<value>org.apache.hadoop.io.Text</value>
|
|
163 |
</property>
|
|
164 |
<property>
|
|
165 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
|
|
166 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
|
|
167 |
</property>
|
|
168 | 168 |
|
169 | 169 |
|
170 |
<!-- ## Classes of mapper and reducer -->
|
|
170 |
<!-- ## Classes of mapper and reducer -->
|
|
171 | 171 |
|
172 |
<property>
|
|
173 |
<name>mapreduce.map.class</name>
|
|
174 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodMapper</value>
|
|
175 |
</property>
|
|
176 |
<property>
|
|
177 |
<name>mapreduce.reduce.class</name>
|
|
178 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodReducer</value>
|
|
179 |
</property>
|
|
180 |
<property>
|
|
181 |
<name>io.serializations</name>
|
|
182 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
|
183 |
</property>
|
|
172 |
<property>
|
|
173 |
<name>mapreduce.map.class</name>
|
|
174 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodMapper</value>
|
|
175 |
</property>
|
|
176 |
<property>
|
|
177 |
<name>mapreduce.reduce.class</name>
|
|
178 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodReducer</value>
|
|
179 |
</property>
|
|
180 |
<property>
|
|
181 |
<name>io.serializations</name>
|
|
182 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
|
183 |
</property>
|
|
184 | 184 |
|
185 |
<!-- ## Custom config -->
|
|
185 |
<!-- ## Custom config -->
|
|
186 | 186 |
|
187 |
<!--delim character used to seperate fields in hdfs dump files <property> -->
|
|
188 |
<property>
|
|
189 |
<name>lod.delim</name>
|
|
190 |
<value>${lod_delim}</value>
|
|
191 |
</property>
|
|
187 |
<!--delim character used to seperate fields in hdfs dump files <property> -->
|
|
188 |
<property>
|
|
189 |
<name>lod.delim</name>
|
|
190 |
<value>${lod_delim}</value>
|
|
191 |
</property>
|
|
192 | 192 |
|
193 |
<property>
|
|
194 |
<name>lod.enclosing</name>
|
|
195 |
<value>${lod_enclosing}</value>
|
|
196 |
</property>
|
|
193 |
<property>
|
|
194 |
<name>lod.enclosing</name>
|
|
195 |
<value>${lod_enclosing}</value>
|
|
196 |
</property>
|
|
197 | 197 |
|
198 | 198 |
|
199 |
<property>
|
|
200 |
<name>lod.seperator</name>
|
|
201 |
<value>${lod_seperator}</value>
|
|
202 |
</property>
|
|
199 |
<property>
|
|
200 |
<name>lod.seperator</name>
|
|
201 |
<value>${lod_seperator}</value>
|
|
202 |
</property>
|
|
203 | 203 |
|
204 | 204 |
|
205 |
<!--source hbase table -->
|
|
206 |
<property>
|
|
207 |
<name>hbase.mapreduce.inputtable</name>
|
|
208 |
<value>${lod_hbase_table}</value>
|
|
209 |
</property>
|
|
210 |
<property>
|
|
211 |
<name>hbase.mapred.inputtable</name>
|
|
212 |
<value>${lod_hbase_table}</value>
|
|
213 |
</property>
|
|
205 |
<!--source hbase table -->
|
|
206 |
<property>
|
|
207 |
<name>hbase.mapreduce.inputtable</name>
|
|
208 |
<value>${lod_hbase_table}</value>
|
|
209 |
</property>
|
|
210 |
<property>
|
|
211 |
<name>hbase.mapred.inputtable</name>
|
|
212 |
<value>${lod_hbase_table}</value>
|
|
213 |
</property>
|
|
214 | 214 |
|
215 |
<!-- This directory does not correspond to a data store. In fact, this
|
|
216 |
directory only contains multiple data stores. It has to be set to the name
|
|
217 |
of the workflow node. -->
|
|
218 |
<property>
|
|
219 |
<name>mapred.output.dir</name>
|
|
220 |
<value>${lod_output}</value>
|
|
221 |
</property>
|
|
222 |
<property>
|
|
223 |
<name>index.conf</name>
|
|
224 |
<value>${lod_indexConf}</value>
|
|
225 |
</property>
|
|
215 |
<!-- This directory does not correspond to a data store. In fact, this
|
|
216 |
directory only contains multiple data stores. It has to be set to the name
|
|
217 |
of the workflow node. -->
|
|
218 |
<property>
|
|
219 |
<name>mapred.output.dir</name>
|
|
220 |
<value>${lod_output}</value>
|
|
221 |
</property>
|
|
222 |
<property>
|
|
223 |
<name>index.conf</name>
|
|
224 |
<value>${lod_indexConf}</value>
|
|
225 |
</property>
|
|
226 | 226 |
|
227 |
<property>
|
|
228 |
<name>lod.lastExecutionDate</name>
|
|
229 |
<value>${lod_lastExecutionDate}</value>
|
|
230 |
</property>
|
|
227 |
<property>
|
|
228 |
<name>lod.lastExecutionDate</name>
|
|
229 |
<value>${lod_lastExecutionDate}</value>
|
|
230 |
</property>
|
|
231 | 231 |
|
232 |
<!-- ## Workflow node parameters -->
|
|
233 |
<property>
|
|
234 |
<name>mapred.reduce.tasks</name>
|
|
235 |
<value>${numReducers}</value>
|
|
236 |
</property>
|
|
232 |
<!-- ## Workflow node parameters -->
|
|
233 |
<property>
|
|
234 |
<name>mapred.reduce.tasks</name>
|
|
235 |
<value>${numReducers}</value>
|
|
236 |
</property>
|
|
237 | 237 |
|
238 |
</configuration>
|
|
238 |
</configuration>
|
|
239 | 239 |
|
240 |
</map-reduce>
|
|
241 |
<ok to="end" />
|
|
240 |
</map-reduce>
|
|
241 |
<ok to="end"/>
|
|
242 | 242 |
|
243 |
<error to="fail" />
|
|
244 |
</action>
|
|
243 |
<error to="fail"/>
|
|
244 |
</action>
|
|
245 | 245 |
|
246 | 246 |
|
247 |
<action name='clearGraph'> |
|
248 |
<java> |
|
249 |
<prepare> |
|
250 |
</prepare> |
|
251 |
<configuration> |
|
252 |
<property> |
|
253 |
<name>mapred.job.queue.name</name> |
|
254 |
<value>${queueName}</value> |
|
255 |
</property> |
|
256 |
</configuration> |
|
257 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.ClearGraph</main-class> |
|
258 |
<arg>${lod_conLine}</arg> |
|
259 |
<arg>${lod_username}</arg> |
|
260 |
<arg>${lod_password}</arg> |
|
261 |
<arg>${lod_minCpart}</arg> |
|
262 |
<arg>${lod_maxCpart}</arg> |
|
263 |
<arg>${lod_part}</arg> |
|
264 |
<arg>${lod_relationsGraph}</arg> |
|
265 |
</java> |
|
266 |
<ok to="rdf_entities_import"/> |
|
247 | 267 |
|
248 |
<action name='clearGraph'> |
|
249 |
<java> |
|
250 |
<prepare> |
|
251 |
</prepare> |
|
252 |
<configuration> |
|
253 |
<property> |
|
254 |
<name>mapred.job.queue.name</name> |
|
255 |
<value>${queueName}</value> |
|
256 |
</property> |
|
257 |
</configuration> |
|
258 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.ClearGraph</main-class> |
|
259 |
<arg>${lod_conLine}</arg> |
|
260 |
<arg>${lod_username}</arg> |
|
261 |
<arg>${lod_password}</arg> |
|
262 |
<arg>${lod_minCpart}</arg> |
|
263 |
<arg>${lod_maxCpart}</arg> |
|
264 |
<arg>${lod_part}</arg> |
|
265 |
<arg>${lod_relationsGraph}</arg> |
|
266 |
</java> |
|
267 |
<ok to="rdf_entities_import" /> |
|
268 |
<error to="fail"/> |
|
269 |
</action> |
|
268 | 270 |
|
269 |
<error to="fail" /> |
|
270 |
</action> |
|
271 |
<action name="rdf_entities_import"> |
|
271 | 272 |
|
272 |
<action name="rdf_entities_import">
|
|
273 |
<map-reduce>
|
|
273 | 274 |
|
274 |
<map-reduce> |
|
275 |
<prepare> |
|
276 |
<delete path="${nameNode}${lod_output}test"/> |
|
277 |
</prepare> |
|
275 | 278 |
|
276 |
<prepare> |
|
277 |
<delete path="${nameNode}${lod_output}test" /> |
|
278 |
</prepare> |
|
279 |
<configuration> |
|
280 |
<property> |
|
281 |
<name>hbase.security.authentication</name> |
|
282 |
<value>simple</value> |
|
283 |
</property> |
|
279 | 284 |
|
280 |
<configuration> |
|
281 |
<property> |
|
282 |
<name>hbase.security.authentication</name> |
|
283 |
<value>simple</value> |
|
284 |
</property> |
|
285 |
<!-- ZOOKEEPER --> |
|
286 |
<property> |
|
287 |
<name>hbase.zookeeper.quorum</name> |
|
288 |
<!--<value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
289 |
</value> --> |
|
290 |
<value> |
|
291 |
quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
292 |
</value> |
|
293 |
</property> |
|
294 |
<property> |
|
295 |
<name>zookeeper.znode.rootserver</name> |
|
296 |
<value>root-region-server</value> |
|
297 |
</property> |
|
285 | 298 |
|
286 |
<!-- ZOOKEEPER --> |
|
287 |
<property> |
|
288 |
<name>hbase.zookeeper.quorum</name> |
|
289 |
<!--<value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
290 |
</value> --> |
|
291 |
<value> |
|
292 |
quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
293 |
</value> |
|
294 |
</property> |
|
295 |
<property> |
|
296 |
<name>zookeeper.znode.rootserver</name> |
|
297 |
<value>root-region-server</value> |
|
298 |
</property> |
|
299 |
<!-- CSV PROPS GO HERE --> |
|
300 |
<property> |
|
301 |
<name>hbase.zookeeper.property.clientPort</name> |
|
302 |
<value>2181</value> |
|
303 |
<!--<value>2182</value> --> |
|
304 |
</property> |
|
299 | 305 |
|
300 |
<!-- CSV PROPS GO HERE --> |
|
301 |
<property> |
|
302 |
<name>hbase.zookeeper.property.clientPort</name> |
|
303 |
<value>2181</value> |
|
304 |
<!--<value>2182</value> --> |
|
305 |
</property> |
|
306 | 306 |
|
307 |
<!-- MR IO --> |
|
307 | 308 |
|
309 |
<property> |
|
310 |
<name>mapred.input.dir</name> |
|
311 |
<value>${lod_EntitiesInputFile}</value> |
|
312 |
</property> |
|
308 | 313 |
|
309 |
<!-- MR IO --> |
|
310 | 314 |
|
311 |
<property>
|
|
312 |
<name>mapred.input.dir</name>
|
|
313 |
<value>${lod_EntitiesInputFile}</value>
|
|
314 |
</property>
|
|
315 |
<property>
|
|
316 |
<name>mapreduce.inputformat.class</name>
|
|
317 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value>
|
|
318 |
</property>
|
|
315 | 319 |
|
320 |
<property> |
|
321 |
<name>mapred.mapoutput.key.class</name> |
|
322 |
<value>org.apache.hadoop.io.Text</value> |
|
323 |
</property> |
|
316 | 324 |
|
325 |
<property> |
|
326 |
<name>mapred.mapoutput.value.class</name> |
|
327 |
<value>org.apache.hadoop.io.Text</value> |
|
328 |
</property> |
|
317 | 329 |
|
318 |
<property>
|
|
319 |
<name>mapreduce.inputformat.class</name>
|
|
320 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value>
|
|
321 |
</property>
|
|
330 |
<property>
|
|
331 |
<name>mapred.output.key.class</name>
|
|
332 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
|
|
333 |
</property>
|
|
322 | 334 |
|
323 |
<property>
|
|
324 |
<name>mapred.mapoutput.key.class</name>
|
|
325 |
<value>org.apache.hadoop.io.Text</value>
|
|
326 |
</property>
|
|
335 |
<property>
|
|
336 |
<name>mapred.output.value.class</name>
|
|
337 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
|
|
338 |
</property>
|
|
327 | 339 |
|
328 |
<property> |
|
329 |
<name>mapred.mapoutput.value.class</name> |
|
330 |
<value>org.apache.hadoop.io.Text</value> |
|
331 |
</property> |
|
340 |
<!-- ## This is required for new MapReduce API usage --> |
|
341 |
<property> |
|
342 |
<name>mapred.mapper.new-api</name> |
|
343 |
<value>true</value> |
|
344 |
</property> |
|
345 |
<property> |
|
346 |
<name>mapred.reducer.new-api</name> |
|
347 |
<value>true</value> |
|
348 |
</property> |
|
332 | 349 |
|
333 |
<property> |
|
334 |
<name>mapred.output.key.class</name> |
|
335 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
336 |
</property> |
|
350 |
<!-- # Job-specific options --> |
|
351 |
<property> |
|
352 |
<name>dfs.blocksize</name> |
|
353 |
<value>32M</value> |
|
354 |
</property> |
|
355 |
<property> |
|
356 |
<name>mapred.output.compress</name> |
|
357 |
<value>false</value> |
|
358 |
</property> |
|
359 |
<property> |
|
360 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
361 |
<value>false</value> |
|
362 |
</property> |
|
363 |
<property> |
|
364 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
365 |
<value>false</value> |
|
366 |
</property> |
|
367 |
<property> |
|
368 |
<name>mapreduce.map.speculative</name> |
|
369 |
<value>false</value> |
|
370 |
</property> |
|
337 | 371 |
|
338 |
<property> |
|
339 |
<name>mapred.output.value.class</name> |
|
340 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
341 |
</property> |
|
342 | 372 |
|
343 |
<!-- ## This is required for new MapReduce API usage --> |
|
344 |
<property> |
|
345 |
<name>mapred.mapper.new-api</name> |
|
346 |
<value>true</value> |
|
347 |
</property> |
|
348 |
<property> |
|
349 |
<name>mapred.reducer.new-api</name> |
|
350 |
<value>true</value> |
|
351 |
</property> |
|
373 |
<property> |
|
374 |
<name>map.output.key.field.separator</name> |
|
375 |
<value>${lod_delim}</value> |
|
376 |
</property> |
|
352 | 377 |
|
353 |
<!-- # Job-specific options --> |
|
354 |
<property> |
|
355 |
<name>dfs.blocksize</name> |
|
356 |
<value>32M</value> |
|
357 |
</property> |
|
358 |
<property> |
|
359 |
<name>mapred.output.compress</name> |
|
360 |
<value>false</value> |
|
361 |
</property> |
|
362 |
<property> |
|
363 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
364 |
<value>false</value> |
|
365 |
</property> |
|
366 |
<property> |
|
367 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
368 |
<value>false</value> |
|
369 |
</property> |
|
370 |
<property> |
|
371 |
<name>mapreduce.map.speculative</name> |
|
372 |
<value>false</value> |
|
373 |
</property> |
|
374 | 378 |
|
379 |
<!-- ## Classes of mapper and reducer --> |
|
380 |
<property> |
|
381 |
<name>mapreduce.map.class</name> |
|
382 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
383 |
</property> |
|
375 | 384 |
|
376 |
<property> |
|
377 |
<name>map.output.key.field.separator</name> |
|
378 |
<value>${lod_delim}</value> |
|
379 |
</property> |
|
385 |
<property> |
|
386 |
<name>mapreduce.reduce.class</name> |
|
380 | 387 |
|
388 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value> |
|
389 |
</property> |
|
381 | 390 |
|
382 |
<!-- ## Classes of mapper and reducer --> |
|
383 |
<property> |
|
384 |
<name>mapreduce.map.class</name> |
|
385 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
386 |
</property> |
|
391 |
<property> |
|
387 | 392 |
|
388 |
<property> |
|
389 |
<name>mapreduce.reduce.class</name> |
|
393 |
<name>io.serializations</name> |
|
394 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
395 |
</property> |
|
390 | 396 |
|
391 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value>
|
|
392 |
</property>
|
|
397 |
<!-- ## Custom config -->
|
|
398 |
<!--delim character used to seperate fields in hdfs dump files <property> -->
|
|
393 | 399 |
|
394 |
<property> |
|
400 |
<property> |
|
401 |
<name>lod.delim</name> |
|
402 |
<value>${lod_delim}</value> |
|
403 |
</property> |
|
395 | 404 |
|
396 |
<name>io.serializations</name> |
|
397 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
398 |
</property> |
|
405 |
<property> |
|
406 |
<name>lod.enclosing</name> |
|
407 |
<value>${lod_enclosing}</value> |
|
408 |
</property> |
|
399 | 409 |
|
400 |
<!-- ## Custom config --> |
|
401 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
410 |
<property> |
|
411 |
<name>lod.seperator</name> |
|
412 |
<value>${lod_seperator}</value> |
|
413 |
</property> |
|
402 | 414 |
|
403 |
<property>
|
|
404 |
<name>lod.delim</name>
|
|
405 |
<value>${lod_delim}</value>
|
|
406 |
</property>
|
|
415 |
<property>
|
|
416 |
<name>lod.lastExecutionDate</name>
|
|
417 |
<value>${lod_lastExecutionDate}</value>
|
|
418 |
</property>
|
|
407 | 419 |
|
408 |
<property>
|
|
409 |
<name>lod.enclosing</name>
|
|
410 |
<value>${lod_enclosing}</value>
|
|
411 |
</property>
|
|
420 |
<property>
|
|
421 |
<name>lod.conLine</name>
|
|
422 |
<value>${lod_conLine}</value>
|
|
423 |
</property>
|
|
412 | 424 |
|
413 |
<property>
|
|
414 |
<name>lod.seperator</name>
|
|
415 |
<value>${lod_seperator}</value>
|
|
416 |
</property>
|
|
425 |
<property>
|
|
426 |
<name>lod.username</name>
|
|
427 |
<value>${lod_username}</value>
|
|
428 |
</property>
|
|
417 | 429 |
|
418 |
<property>
|
|
419 |
<name>lod.lastExecutionDate</name>
|
|
420 |
<value>${lod_lastExecutionDate}</value>
|
|
421 |
</property>
|
|
430 |
<property>
|
|
431 |
<name>lod.password</name>
|
|
432 |
<value>${lod_password}</value>
|
|
433 |
</property>
|
|
422 | 434 |
|
423 |
<property>
|
|
424 |
<name>lod.conLine</name>
|
|
425 |
<value>${lod_conLine}</value>
|
|
426 |
</property>
|
|
435 |
<property>
|
|
436 |
<name>lod.minCpart</name>
|
|
437 |
<value>${lod_minCpart}</value>
|
|
438 |
</property>
|
|
427 | 439 |
|
428 |
<property>
|
|
429 |
<name>lod.username</name>
|
|
430 |
<value>${lod_username}</value>
|
|
431 |
</property>
|
|
440 |
<property>
|
|
441 |
<name>lod.maxCpart</name>
|
|
442 |
<value>${lod_maxCpart}</value>
|
|
443 |
</property>
|
|
432 | 444 |
|
433 |
<property>
|
|
434 |
<name>lod.password</name>
|
|
435 |
<value>${lod_password}</value>
|
|
436 |
</property>
|
|
445 |
<property>
|
|
446 |
<name>lod.part</name>
|
|
447 |
<value>${lod_part}</value>
|
|
448 |
</property>
|
|
437 | 449 |
|
438 |
<property>
|
|
439 |
<name>lod.minCpart</name>
|
|
440 |
<value>${lod_minCpart}</value>
|
|
441 |
</property>
|
|
450 |
<property>
|
|
451 |
<name>lod.jsonRels</name>
|
|
452 |
<value>${lod_jsonRels}</value>
|
|
453 |
</property>
|
|
442 | 454 |
|
443 |
<property>
|
|
444 |
<name>lod.maxCpart</name>
|
|
445 |
<value>${lod_maxCpart}</value>
|
|
446 |
</property>
|
|
455 |
<property>
|
|
456 |
<name>lod.jsonEntities</name>
|
|
457 |
<value>${lod_jsonEntities}</value>
|
|
458 |
</property>
|
|
447 | 459 |
|
448 |
<property>
|
|
449 |
<name>lod.part</name>
|
|
450 |
<value>${lod_part}</value>
|
|
451 |
</property>
|
|
460 |
<property>
|
|
461 |
<name>lod.defaultGraph</name>
|
|
462 |
<value>${lod_defaultGraph}</value>
|
|
463 |
</property>
|
|
452 | 464 |
|
453 |
<property> |
|
454 |
<name>lod.jsonRels</name> |
|
455 |
<value>${lod_jsonRels}</value> |
|
456 |
</property> |
|
457 | 465 |
|
458 |
<property>
|
|
459 |
<name>lod.jsonEntities</name>
|
|
460 |
<value>${lod_jsonEntities}</value>
|
|
461 |
</property>
|
|
466 |
<property>
|
|
467 |
<name>lod.relationsGraph</name>
|
|
468 |
<value>${lod_relationsGraph}</value>
|
|
469 |
</property>
|
|
462 | 470 |
|
463 |
<property> |
|
464 |
<name>lod.defaultGraph</name> |
|
465 |
<value>${lod_defaultGraph}</value> |
|
466 |
</property> |
|
467 | 471 |
|
472 |
<property> |
|
473 |
<name>lod.baseURI</name> |
|
474 |
<value>${lod_baseURI}</value> |
|
475 |
</property> |
|
468 | 476 |
|
469 |
<property>
|
|
470 |
<name>lod.relationsGraph</name>
|
|
471 |
<value>${lod_relationsGraph}</value>
|
|
472 |
</property>
|
|
477 |
<property>
|
|
478 |
<name>mapred.reduce.tasks</name>
|
|
479 |
<value>${numReducers}</value>
|
|
480 |
</property>
|
|
473 | 481 |
|
482 |
<property> |
|
483 |
<name>lod.inputFile</name> |
|
484 |
<value>${lod_EntitiesInputFile}</value> |
|
485 |
</property> |
|
474 | 486 |
|
475 |
<property>
|
|
476 |
<name>lod.baseURI</name>
|
|
477 |
<value>${lod_baseURI}</value>
|
|
478 |
</property>
|
|
487 |
<property>
|
|
488 |
<name>mapred.output.dir</name>
|
|
489 |
<value>${lod_output}test</value>
|
|
490 |
</property>
|
|
479 | 491 |
|
480 |
<property> |
|
481 |
<name>mapred.reduce.tasks</name> |
|
482 |
<value>${numReducers}</value> |
|
483 |
</property> |
|
492 |
<property> |
|
493 |
<name>lod.entitiesPerQuery</name> |
|
494 |
<value>${lod_entitiesPerQuery}</value> |
|
495 |
</property> |
|
496 |
<property> |
|
497 |
<name>lod.relationsPerQuery</name> |
|
498 |
<value>${lod_relationsPerQuery}</value> |
|
499 |
</property> |
|
484 | 500 |
|
485 |
<property>
|
|
486 |
<name>lod.inputFile</name>
|
|
487 |
<value>${lod_EntitiesInputFile}</value>
|
|
488 |
</property>
|
|
501 |
<property>
|
|
502 |
<name>lod.dataPath</name>
|
|
503 |
<value>${lod_dataPath}</value>
|
|
504 |
</property>
|
|
489 | 505 |
|
490 |
<property> |
|
491 |
<name>mapred.output.dir</name> |
|
492 |
<value>${lod_output}test</value> |
|
493 |
</property> |
|
494 | 506 |
|
495 |
<property> |
|
496 |
<name>lod.entitiesPerQuery</name> |
|
497 |
<value>${lod_entitiesPerQuery}</value> |
|
498 |
</property> |
|
499 |
<property> |
|
500 |
<name>lod.relationsPerQuery</name> |
|
501 |
<value>${lod_relationsPerQuery}</value> |
|
502 |
</property> |
|
507 |
</configuration> |
|
508 |
</map-reduce> |
|
503 | 509 |
|
504 |
<property> |
|
505 |
<name>lod.dataPath</name> |
|
506 |
<value>${lod_dataPath}</value> |
|
507 |
</property> |
|
510 |
<ok to="rdf_relations_import"/> |
|
508 | 511 |
|
512 |
<error to="fail"/> |
|
513 |
</action> |
|
509 | 514 |
|
510 |
</configuration> |
|
511 |
</map-reduce> |
|
512 | 515 |
|
513 |
<ok to="rdf_relations_import" />
|
|
516 |
<action name="rdf_relations_import">
|
|
514 | 517 |
|
515 |
<error to="fail" /> |
|
516 |
</action> |
|
518 |
<map-reduce> |
|
517 | 519 |
|
520 |
<prepare> |
|
521 |
<delete path="${nameNode}${lod_output}test"/> |
|
522 |
</prepare> |
|
518 | 523 |
|
519 | 524 |
|
525 |
<configuration> |
|
520 | 526 |
|
521 |
<action name="rdf_relations_import"> |
|
527 |
<property> |
|
528 |
<name>hbase.security.authentication</name> |
|
529 |
<value>simple</value> |
|
530 |
</property> |
|
522 | 531 |
|
523 |
<map-reduce> |
|
524 | 532 |
|
525 |
<prepare> |
|
526 |
<delete path="${nameNode}${lod_output}test" /> |
|
527 |
</prepare> |
|
533 |
<!-- ZOOKEEPER --> |
|
534 |
<property> |
|
535 |
<name>hbase.zookeeper.quorum</name> |
|
536 |
<!--<value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
537 |
</value> --> |
|
538 |
<value> |
|
539 |
quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
540 |
</value> |
|
541 |
</property> |
|
542 |
<property> |
|
543 |
<name>zookeeper.znode.rootserver</name> |
|
544 |
<value>root-region-server</value> |
|
545 |
</property> |
|
528 | 546 |
|
547 |
<!-- CSV PROPS GO HERE --> |
|
548 |
<property> |
|
549 |
<name>hbase.zookeeper.property.clientPort</name> |
|
550 |
<value>2181</value> |
|
551 |
<!--<value>2182</value> --> |
|
552 |
</property> |
|
529 | 553 |
|
530 |
<configuration> |
|
531 | 554 |
|
532 |
<property> |
|
533 |
<name>hbase.security.authentication</name> |
|
534 |
<value>simple</value> |
|
535 |
</property> |
|
555 |
<!-- MR IO --> |
|
536 | 556 |
|
557 |
<property> |
|
558 |
<name>mapred.input.dir</name> |
|
559 |
<value>${lod_RelationsInputFile}</value> |
|
560 |
</property> |
|
537 | 561 |
|
538 |
<!-- ZOOKEEPER --> |
|
539 |
<property> |
|
540 |
<name>hbase.zookeeper.quorum</name> |
|
541 |
<!--<value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
542 |
</value> --> |
|
543 |
<value> |
|
544 |
quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
545 |
</value> |
|
546 |
</property> |
|
547 |
<property> |
|
548 |
<name>zookeeper.znode.rootserver</name> |
|
549 |
<value>root-region-server</value> |
|
550 |
</property> |
|
562 |
<property> |
|
563 |
<name>mapreduce.inputformat.class</name> |
|
564 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value> |
|
565 |
</property> |
|
551 | 566 |
|
552 |
<!-- CSV PROPS GO HERE --> |
|
553 |
<property> |
|
554 |
<name>hbase.zookeeper.property.clientPort</name> |
|
555 |
<value>2181</value> |
|
556 |
<!--<value>2182</value> --> |
|
557 |
</property> |
|
567 |
<property> |
|
568 |
<name>mapred.mapoutput.key.class</name> |
|
569 |
<value>org.apache.hadoop.io.Text</value> |
|
570 |
</property> |
|
558 | 571 |
|
572 |
<property> |
|
573 |
<name>mapred.mapoutput.value.class</name> |
|
574 |
<value>org.apache.hadoop.io.Text</value> |
|
575 |
</property> |
|
559 | 576 |
|
577 |
<property> |
|
578 |
<name>mapred.output.key.class</name> |
|
579 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
580 |
</property> |
|
560 | 581 |
|
561 |
<!-- MR IO --> |
|
582 |
<property> |
|
583 |
<name>mapred.output.value.class</name> |
|
584 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
585 |
</property> |
|
562 | 586 |
|
563 |
<property> |
|
564 |
<name>mapred.input.dir</name> |
|
565 |
<value>${lod_RelationsInputFile}</value> |
|
566 |
</property> |
|
587 |
<!-- ## This is required for new MapReduce API usage --> |
|
588 |
<property> |
|
589 |
<name>mapred.mapper.new-api</name> |
|
590 |
<value>true</value> |
|
591 |
</property> |
|
592 |
<property> |
|
593 |
<name>mapred.reducer.new-api</name> |
|
594 |
<value>true</value> |
|
595 |
</property> |
|
567 | 596 |
|
568 |
<property> |
|
569 |
<name>mapreduce.inputformat.class</name> |
|
570 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value> |
|
571 |
</property> |
|
597 |
<!-- # Job-specific options --> |
|
598 |
<property> |
|
599 |
<name>dfs.blocksize</name> |
|
600 |
<value>32M</value> |
|
601 |
</property> |
|
602 |
<property> |
|
603 |
<name>mapred.output.compress</name> |
|
604 |
<value>false</value> |
|
605 |
</property> |
|
606 |
<property> |
|
607 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
608 |
<value>false</value> |
|
609 |
</property> |
|
610 |
<property> |
|
611 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
612 |
<value>false</value> |
|
613 |
</property> |
|
614 |
<property> |
|
615 |
<name>mapreduce.map.speculative</name> |
|
616 |
<value>false</value> |
|
617 |
</property> |
|
572 | 618 |
|
573 |
<property> |
|
574 |
<name>mapred.mapoutput.key.class</name> |
|
575 |
<value>org.apache.hadoop.io.Text</value> |
|
576 |
</property> |
|
577 | 619 |
|
578 |
<property>
|
|
579 |
<name>mapred.mapoutput.value.class</name>
|
|
580 |
<value>org.apache.hadoop.io.Text</value>
|
|
581 |
</property>
|
|
620 |
<property>
|
|
621 |
<name>map.output.key.field.separator</name>
|
|
622 |
<value>${lod_delim}</value>
|
|
623 |
</property>
|
|
582 | 624 |
|
583 |
<property> |
|
584 |
<name>mapred.output.key.class</name> |
|
585 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
586 |
</property> |
|
587 | 625 |
|
588 |
<property> |
|
589 |
<name>mapred.output.value.class</name> |
|
590 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
591 |
</property> |
|
626 |
<!-- ## Classes of mapper and reducer --> |
|
627 |
<property> |
|
628 |
<name>mapreduce.map.class</name> |
|
629 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
630 |
</property> |
|
592 | 631 |
|
593 |
<!-- ## This is required for new MapReduce API usage --> |
|
594 |
<property> |
|
595 |
<name>mapred.mapper.new-api</name> |
|
596 |
<value>true</value> |
|
597 |
</property> |
|
598 |
<property> |
|
599 |
<name>mapred.reducer.new-api</name> |
|
600 |
<value>true</value> |
|
601 |
</property> |
|
632 |
<property> |
|
633 |
<name>mapreduce.reduce.class</name> |
|
602 | 634 |
|
603 |
<!-- # Job-specific options --> |
|
604 |
<property> |
|
605 |
<name>dfs.blocksize</name> |
|
606 |
<value>32M</value> |
|
607 |
</property> |
|
608 |
<property> |
|
609 |
<name>mapred.output.compress</name> |
|
610 |
<value>false</value> |
|
611 |
</property> |
|
612 |
<property> |
|
613 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
614 |
<value>false</value> |
|
615 |
</property> |
|
616 |
<property> |
|
617 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
618 |
<value>false</value> |
|
619 |
</property> |
|
620 |
<property> |
|
621 |
<name>mapreduce.map.speculative</name> |
|
622 |
<value>false</value> |
|
623 |
</property> |
|
635 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value> |
|
636 |
</property> |
|
624 | 637 |
|
638 |
<property> |
|
625 | 639 |
|
626 |
<property> |
|
627 |
<name>map.output.key.field.separator</name> |
|
628 |
<value>${lod_delim}</value> |
|
629 |
</property> |
|
640 |
<name>io.serializations</name> |
|
641 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
642 |
</property> |
|
630 | 643 |
|
644 |
<!-- ## Custom config --> |
|
645 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
631 | 646 |
|
632 |
<!-- ## Classes of mapper and reducer --> |
|
633 |
<property> |
|
634 |
<name>mapreduce.map.class</name> |
|
635 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
636 |
</property> |
|
647 |
<property> |
|
648 |
<name>lod.delim</name> |
|
649 |
<value>${lod_delim}</value> |
|
650 |
</property> |
|
637 | 651 |
|
638 |
<property> |
|
639 |
<name>mapreduce.reduce.class</name> |
|
652 |
<property> |
|
653 |
<name>lod.enclosing</name> |
|
654 |
<value>${lod_enclosing}</value> |
|
655 |
</property> |
|
640 | 656 |
|
641 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value> |
|
642 |
</property> |
|
657 |
<property> |
|
658 |
<name>lod.seperator</name> |
|
659 |
<value>${lod_seperator}</value> |
|
660 |
</property> |
|
643 | 661 |
|
644 |
<property> |
|
662 |
<property> |
|
663 |
<name>lod.lastExecutionDate</name> |
|
664 |
<value>${lod_lastExecutionDate}</value> |
|
665 |
</property> |
|
645 | 666 |
|
646 |
<name>io.serializations</name> |
|
647 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
648 |
</property> |
|
667 |
<property> |
|
668 |
<name>lod.conLine</name> |
|
669 |
<value>${lod_conLine}</value> |
|
670 |
</property> |
|
649 | 671 |
|
650 |
<!-- ## Custom config --> |
|
651 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
672 |
<property> |
|
673 |
<name>lod.username</name> |
|
674 |
<value>${lod_username}</value> |
|
675 |
</property> |
|
652 | 676 |
|
653 |
<property>
|
|
654 |
<name>lod.delim</name>
|
|
655 |
<value>${lod_delim}</value>
|
|
656 |
</property>
|
|
677 |
<property>
|
|
678 |
<name>lod.password</name>
|
|
679 |
<value>${lod_password}</value>
|
|
680 |
</property>
|
|
657 | 681 |
|
658 |
<property>
|
|
659 |
<name>lod.enclosing</name>
|
|
660 |
<value>${lod_enclosing}</value>
|
|
661 |
</property>
|
|
682 |
<property>
|
|
683 |
<name>lod.minCpart</name>
|
|
684 |
<value>${lod_minCpart}</value>
|
|
685 |
</property>
|
|
662 | 686 |
|
663 |
<property>
|
|
664 |
<name>lod.seperator</name>
|
|
665 |
<value>${lod_seperator}</value>
|
|
666 |
</property>
|
|
687 |
<property>
|
|
688 |
<name>lod.maxCpart</name>
|
|
689 |
<value>${lod_maxCpart}</value>
|
|
690 |
</property>
|
|
667 | 691 |
|
668 |
<property>
|
|
669 |
<name>lod.lastExecutionDate</name>
|
|
670 |
<value>${lod_lastExecutionDate}</value>
|
|
671 |
</property>
|
|
692 |
<property>
|
|
693 |
<name>lod.part</name>
|
|
694 |
<value>${lod_part}</value>
|
|
695 |
</property>
|
|
672 | 696 |
|
673 |
<property>
|
|
674 |
<name>lod.conLine</name>
|
|
675 |
<value>${lod_conLine}</value>
|
|
676 |
</property>
|
|
697 |
<property>
|
|
698 |
<name>lod.jsonRels</name>
|
|
699 |
<value>${lod_jsonRels}</value>
|
|
700 |
</property>
|
|
677 | 701 |
|
678 |
<property>
|
|
679 |
<name>lod.username</name>
|
|
680 |
<value>${lod_username}</value>
|
|
681 |
</property>
|
|
702 |
<property>
|
|
703 |
<name>lod.jsonEntities</name>
|
|
704 |
<value>${lod_jsonEntities}</value>
|
|
705 |
</property>
|
|
682 | 706 |
|
683 |
<property>
|
|
684 |
<name>lod.password</name>
|
|
685 |
<value>${lod_password}</value>
|
|
686 |
</property>
|
|
707 |
<property>
|
|
708 |
<name>lod.defaultGraph</name>
|
|
709 |
<value>${lod_defaultGraph}</value>
|
|
710 |
</property>
|
|
687 | 711 |
|
688 |
<property> |
|
689 |
<name>lod.minCpart</name> |
|
690 |
<value>${lod_minCpart}</value> |
|
691 |
</property> |
|
692 | 712 |
|
693 |
<property>
|
|
694 |
<name>lod.maxCpart</name>
|
|
695 |
<value>${lod_maxCpart}</value>
|
|
696 |
</property>
|
|
713 |
<property>
|
|
714 |
<name>lod.relationsGraph</name>
|
|
715 |
<value>${lod_relationsGraph}</value>
|
|
716 |
</property>
|
|
697 | 717 |
|
698 |
<property>
|
|
699 |
<name>lod.part</name>
|
|
700 |
<value>${lod_part}</value>
|
|
701 |
</property>
|
|
718 |
<property>
|
|
719 |
<name>lod.baseURI</name>
|
|
720 |
<value>${lod_baseURI}</value>
|
|
721 |
</property>
|
|
702 | 722 |
|
703 |
<property>
|
|
704 |
<name>lod.jsonRels</name>
|
|
705 |
<value>${lod_jsonRels}</value>
|
|
706 |
</property>
|
|
723 |
<property>
|
|
724 |
<name>mapred.reduce.tasks</name>
|
|
725 |
<value>${numReducers}</value>
|
|
726 |
</property>
|
|
707 | 727 |
|
708 |
<property>
|
|
709 |
<name>lod.jsonEntities</name>
|
|
710 |
<value>${lod_jsonEntities}</value>
|
|
711 |
</property>
|
|
728 |
<property>
|
|
729 |
<name>lod.inputFile</name>
|
|
730 |
<value>${lod_RelationsInputFile}</value>
|
|
731 |
</property>
|
|
712 | 732 |
|
713 |
<property> |
|
714 |
<name>lod.defaultGraph</name> |
|
715 |
<value>${lod_defaultGraph}</value> |
|
716 |
</property> |
|
717 | 733 |
|
734 |
<property> |
|
735 |
<name>mapred.output.dir</name> |
|
736 |
<value>${lod_output}test</value> |
|
737 |
</property> |
|
718 | 738 |
|
719 |
<property>
|
|
720 |
<name>lod.relationsGraph</name>
|
|
721 |
<value>${lod_relationsGraph}</value>
|
|
722 |
</property>
|
|
739 |
<property>
|
|
740 |
<name>lod.entitiesPerQuery</name>
|
|
741 |
<value>${lod_entitiesPerQuery}</value>
|
|
742 |
</property>
|
|
723 | 743 |
|
724 |
<property> |
|
725 |
<name>lod.baseURI</name> |
|
726 |
<value>${lod_baseURI}</value> |
|
727 |
</property> |
|
744 |
<property> |
|
745 |
<name>lod.relationsPerQuery</name> |
|
746 |
<value>${lod_relationsPerQuery}</value> |
|
747 |
</property> |
|
748 |
<property> |
|
749 |
<name>lod.dataPath</name> |
|
750 |
<value>${lod_dataPath}</value> |
|
751 |
</property> |
|
728 | 752 |
|
729 |
<property> |
|
730 |
<name>mapred.reduce.tasks</name> |
|
731 |
<value>${numReducers}</value> |
|
732 |
</property> |
|
733 | 753 |
|
734 |
<property> |
|
735 |
<name>lod.inputFile</name> |
|
736 |
<value>${lod_RelationsInputFile}</value> |
|
737 |
</property> |
|
754 |
</configuration> |
|
755 |
</map-reduce> |
|
738 | 756 |
|
757 |
<ok to="finalize"/> |
|
739 | 758 |
|
740 |
<property> |
|
741 |
<name>mapred.output.dir</name> |
|
742 |
<value>${lod_output}test</value> |
|
743 |
</property> |
|
759 |
<error to="fail"/> |
|
760 |
</action> |
|
744 | 761 |
|
745 |
<property> |
|
746 |
<name>lod.entitiesPerQuery</name> |
|
747 |
<value>${lod_entitiesPerQuery}</value> |
|
748 |
</property> |
|
749 | 762 |
|
750 |
<property> |
|
751 |
<name>lod.relationsPerQuery</name> |
|
752 |
<value>${lod_relationsPerQuery}</value> |
|
753 |
</property> |
|
754 |
<property> |
|
755 |
<name>lod.dataPath</name> |
|
756 |
<value>${lod_dataPath}</value> |
|
757 |
</property> |
|
763 |
<action name='finalize'> |
|
764 |
<java> |
|
765 |
<prepare> |
|
766 |
</prepare> |
|
758 | 767 |
|
768 |
<configuration> |
|
769 |
<property> |
|
770 |
<name>mapred.job.queue.name</name> |
|
771 |
<value>${queueName}</value> |
|
772 |
</property> |
|
773 |
</configuration> |
|
759 | 774 |
|
760 |
</configuration> |
|
761 |
</map-reduce> |
|
775 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.Finalize</main-class> |
|
762 | 776 |
|
763 |
<ok to="finalize" /> |
|
777 |
<arg>${lod_conLine}</arg> |
|
778 |
<arg>${lod_username}</arg> |
|
779 |
<arg>${lod_password}</arg> |
|
780 |
<arg>${lod_minCpart}</arg> |
|
781 |
<arg>${lod_maxCpart}</arg> |
|
782 |
<arg>${lod_part}</arg> |
|
783 |
<arg>${lod_relationsGraph}</arg> |
|
784 |
</java> |
|
785 |
<ok to="end"/> |
|
764 | 786 |
|
765 |
<error to="fail" />
|
|
766 |
</action>
|
|
787 |
<error to="fail"/>
|
|
788 |
</action>
|
|
767 | 789 |
|
790 |
<action name="cleanUpHDFS"> |
|
791 |
<fs> |
|
792 |
<delete path="${lod_output}test"/> |
|
793 |
</fs> |
|
768 | 794 |
|
795 |
<ok to="end"/> |
|
796 |
<error to="fail"/> |
|
797 |
</action> |
|
769 | 798 |
|
770 |
<action name='finalize'> |
|
771 |
<java> |
|
772 |
<prepare> |
|
773 |
</prepare> |
|
774 | 799 |
|
775 |
<configuration> |
|
776 |
<property> |
|
777 |
<name>mapred.job.queue.name</name> |
|
778 |
<value>${queueName}</value> |
|
779 |
</property> |
|
780 |
</configuration> |
|
781 |
|
|
782 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.Finalize</main-class> |
|
783 |
|
|
784 |
<!-- <arg>${lod_conLine}</arg> <arg>${lod_username"}</arg> <arg>${lod_password}</arg> |
|
785 |
<arg>${lod_minCpart}</arg> <arg>${lod_maxCpart}</arg> <arg>${lod_part}</arg> |
|
786 |
<arg>${lod_relationsGraph}</arg> --> |
|
787 |
</java> |
|
788 |
<ok to="end" /> |
|
789 |
|
|
790 |
<error to="fail" /> |
|
791 |
</action> |
|
792 |
|
|
793 |
<action name="cleanUpHDFS"> |
|
794 |
<fs> |
|
795 |
<delete path="${lod_output}test" /> |
|
796 |
</fs> |
|
797 |
|
|
798 |
<ok to="end" /> |
|
799 |
<error to="fail" /> |
|
800 |
</action> |
|
801 |
|
|
802 |
|
|
803 |
<kill name="fail"> |
|
804 |
<message> |
|
805 |
Unfortunately, the process failed -- error message: |
|
806 |
[${wf:errorMessage(wf:lastErrorNode())}] |
|
807 |
</message> |
|
808 |
</kill> |
|
809 |
<end name="end" /> |
|
800 |
<kill name="fail"> |
|
801 |
<message> |
|
802 |
Unfortunately, the process failed -- error message: |
|
803 |
[${wf:errorMessage(wf:lastErrorNode())}] |
|
804 |
</message> |
|
805 |
</kill> |
|
806 |
<end name="end"/> |
|
810 | 807 |
</workflow-app> |
Also available in: Unified diff
fix for missing arguements in finalize