Revision 29365
Added by Eri Katsari over 10 years ago
modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/examples/javamapreduce/stats/job.properties | ||
---|---|---|
7 | 7 |
jobTracker=nmis-hadoop-jt |
8 | 8 |
queueName=default |
9 | 9 |
user.name=eri.katsari |
10 |
numReducers=1
|
|
10 |
numReducers=1 |
|
11 | 11 |
Stats_db_Url = jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats |
12 | 12 |
Stats_db_User = sqoop |
13 | 13 |
Stats_db_Pass = sqoop |
... | ... | |
20 | 20 |
Stats_null_String_Field=NULL |
21 | 21 |
Stats_null_Numeric_Field=-1 |
22 | 22 |
Stats_delim_Character=! |
23 |
Stats_enclosing_Character=# |
|
23 | 24 |
Stats_db_table_map=datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultResult=result_results,resultTopic=result_topics,category=category,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources |
24 | 25 |
out1=datasource |
25 | 26 |
out2=project |
modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/examples/javamapreduce/stats/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app name="test-core_examples_javamapreduce_stats" |
|
2 |
xmlns="uri:oozie:workflow:0.4"> |
|
3 |
<!-- map reduce job that exports hbase data and prepares them for import |
|
4 |
to the relation database used for statistics generation --> |
|
1 |
<workflow-app name="test-core_examples_javamapreduce_stats" xmlns="uri:oozie:workflow:0.4"> |
|
2 |
<!-- map reduce job that exports hbase data and prepares them for import to the relation database used for statistics generation --> |
|
5 | 3 |
|
6 | 4 |
<global> |
7 | 5 |
<job-tracker>${jobTracker}</job-tracker> |
... | ... | |
25 | 23 |
<!-- column families: --> |
26 | 24 |
|
27 | 25 |
<arg> |
28 |
-f |
|
29 |
datasource |
|
26 |
<!-- -f datasource --> |
|
30 | 27 |
<!-- , datasourceOrganization_provision_provides ,organization, --> |
31 | 28 |
<!-- projectOrganization_participation_isParticipant, --> |
32 | 29 |
<!-- project --> |
33 | 30 |
<!-- ,projectOrganization_participation_hasParticipant --> |
34 | 31 |
<!-- , --> |
35 |
<!-- result -->
|
|
32 |
result
|
|
36 | 33 |
<!-- , resultProject_outcome_produces, --> |
37 | 34 |
<!-- personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo --> |
38 | 35 |
</arg> |
... | ... | |
141 | 138 |
</property> |
142 | 139 |
|
143 | 140 |
<!-- I/O FORMAT --> |
144 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required |
|
145 |
to fix issue with traling tab added between id and value in multiple outputs --> |
|
141 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required to fix issue with traling tab added between id and value in multiple outputs --> |
|
146 | 142 |
<property> |
147 | 143 |
<name>mapred.textoutputformat.separator</name> |
148 | 144 |
<value>${Stats_delim_Character}</value> |
... | ... | |
152 | 148 |
<property> |
153 | 149 |
<name>mapreduce.multipleoutputs</name> |
154 | 150 |
|
155 |
<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} |
|
156 |
${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} |
|
157 |
${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} |
|
151 |
<value>${out1} ${out2} ${out3} ${out4} ${out5} ${out6} ${out7} ${out8} ${out9} ${out10} ${out11} ${out12} ${out13} ${out14} ${out15} ${out16} ${out17} ${out18} ${out19} ${out20} ${out21} |
|
158 | 152 |
</value> |
159 | 153 |
|
160 | 154 |
</property> |
... | ... | |
504 | 498 |
<name>stats.nullNum</name> |
505 | 499 |
<value>${Stats_null_Numeric_Field}</value> |
506 | 500 |
</property> |
501 |
<property> |
|
502 |
<name>stats.enclChar</name> |
|
503 |
<value>${Stats_enclosing_Character}</value> |
|
504 |
</property> |
|
505 |
|
|
506 |
|
|
507 | 507 |
<!--source hbase table --> |
508 | 508 |
<property> |
509 | 509 |
<name>hbase.mapreduce.inputtable</name> |
... | ... | |
514 | 514 |
<name>stats.dbTablesMap</name> |
515 | 515 |
<value>${Stats_db_table_map}</value> |
516 | 516 |
</property> |
517 |
|
|
518 |
<!-- This directory does not correspond to a data store. In fact, this |
|
519 |
directory only contains multiple data stores. It has to be set to the name |
|
520 |
of the workflow node. --> |
|
517 |
|
|
518 |
<!-- This directory does not correspond to a data store. In fact, this directory only contains multiple data stores. It has to be set to the name of the workflow node. --> |
|
521 | 519 |
<property> |
522 | 520 |
<name>mapred.output.dir</name> |
523 | 521 |
<value>${Stats_output_Path}</value> |
... | ... | |
595 | 593 |
<arg>-PStats_sqoop_StatementPerTrans=${Stats_sqoop_StatementPerTrans} |
596 | 594 |
</arg> |
597 | 595 |
<arg>-PStats_delim_Character=${Stats_delim_Character}</arg> |
598 |
<arg>-PStats_newline_Character=${Stats_newline_Character}</arg> |
|
599 | 596 |
<arg>-PStats_db_table_map=${Stats_db_table_map}</arg> |
600 |
</java> |
|
597 |
<arg>-PStats_enclosing_Character=${Stats_enclosing_Character} </arg> |
|
598 |
|
|
599 |
</java> |
|
601 | 600 |
<ok to="end" /> |
602 | 601 |
<error to="fail" /> |
603 | 602 |
</action> |
... | ... | |
648 | 647 |
-Dsqoop.statements.per.transaction==1000 --connect |
649 | 648 |
jdbc:postgresql://duffy.di.uoa.gr:5432/test_stats --export-dir |
650 | 649 |
/tmp/test_stats/datasource-r-00000 --table datasource --username |
651 |
sqoop --password sqoop --input-fields-terminated-by ! -m 4 |
|
650 |
sqoop --password sqoop |
|
651 |
--input-fields-terminated-by ! -m 4 |
|
652 | 652 |
</command> |
653 | 653 |
|
654 | 654 |
</sqoop> |
... | ... | |
663 | 663 |
</message> |
664 | 664 |
</kill> |
665 | 665 |
<end name="end" /> |
666 |
</workflow-app>
|
|
666 |
</workflow-app> |
modules/dnet-openaire-stats-workflow/pom.xml | ||
---|---|---|
11 | 11 |
<packaging>jar</packaging> |
12 | 12 |
<version>1.0.0-SNAPSHOT</version> |
13 | 13 |
|
14 |
<!-- REMOVE BEFORE DEPLOYMENT --> |
|
14 | 15 |
<properties> |
15 | 16 |
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
16 |
|
|
17 |
<iis.hadoop.frontend.user.name>eri.katsari </iis.hadoop.frontend.user.name> |
|
17 |
<iis.hadoop.frontend.user.name>eri.katsari </iis.hadoop.frontend.user.name> |
|
18 | 18 |
<iis.hadoop.frontend.home.dir>home </iis.hadoop.frontend.home.dir> |
19 | 19 |
<iis.hadoop.frontend.host.name>oozie.t.hadoop.research-infrastructures.eu </iis.hadoop.frontend.host.name> |
20 |
<DjobTracker>nmis-hadoop-jt</DjobTracker> |
|
21 |
<oozieServiceLoc> |
|
22 |
http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie</oozieServiceLoc> |
|
20 |
<oozieServiceLoc>http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie</oozieServiceLoc> |
|
23 | 21 |
<nameNode>hdfs://nmis-hadoop-cluster</nameNode> |
24 | 22 |
<jobTracker> nmis-hadoop-jt</jobTracker> |
25 | 23 |
<queueName>default</queueName> |
26 | 24 |
<user.name>eri.katsari</user.name> |
27 | 25 |
</properties> |
26 |
|
|
28 | 27 |
<dependencies> |
29 | 28 |
|
30 | 29 |
<dependency> |
... | ... | |
158 | 157 |
|
159 | 158 |
<build> |
160 | 159 |
<plugins> |
161 |
|
|
162 |
<!-- <plugin> --> |
|
163 |
<!-- <groupId>org.codehaus.mojo</groupId> --> |
|
164 |
<!-- <artifactId>exec-maven-plugin</artifactId> --> |
|
165 |
<!-- <version>1.2.1</version> --> |
|
166 |
<!-- <configuration> --> |
|
167 |
<!-- <mainClass>eu.dnetlib.iis.core.workflows.stats.SqoopWrapper</mainClass> --> |
|
168 |
<!-- </configuration> --> |
|
169 |
<!-- </plugin> --> |
|
170 |
<!-- <plugin> --> |
|
171 |
<!-- <groupId>org.apache.maven.plugins</groupId> --> |
|
172 |
<!-- <artifactId>maven-shade-plugin</artifactId> --> |
|
173 |
<!-- <version>2.1</version> --> |
|
174 |
<!-- <executions> --> |
|
175 |
<!-- <execution> --> |
|
176 |
<!-- <phase>package</phase> --> |
|
177 |
<!-- <goals> --> |
|
178 |
<!-- <goal>shade</goal> --> |
|
179 |
<!-- </goals> --> |
|
180 |
<!-- <configuration> --> |
|
181 |
<!-- <transformers> --> |
|
182 |
<!-- <transformer --> |
|
183 |
<!-- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> --> |
|
184 |
<!-- </transformer> --> |
|
185 |
<!-- </transformers> --> |
|
186 |
<!-- </configuration> --> |
|
187 |
<!-- </execution> --> |
|
188 |
<!-- </executions> </plugin> --> |
|
189 |
|
|
190 |
<!-- This plugin makes the Maven->Update Project Configuration not forget --> |
|
191 |
<!-- about the "target/generated-sources/java" source path --> |
|
160 |
|
|
192 | 161 |
<plugin> |
193 | 162 |
<groupId>org.codehaus.mojo</groupId> |
194 | 163 |
<artifactId>build-helper-maven-plugin</artifactId> |
Also available in: Unified diff