Revision 55645
Added by Antonis Lempesis over 5 years ago
modules/dnet-openaire-stats-workflow/trunk/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/job.properties | ||
---|---|---|
8 | 8 |
Stats_delim_Character=! |
9 | 9 |
Stats_enclosing_Character=# |
10 | 10 |
Stats_getdeletedbyinference=false |
11 |
Stats_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking], max=1000 }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
|
|
11 |
Stats_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
|
|
12 | 12 |
Stats_null_Numeric_Field=null |
13 | 13 |
Stats_null_String_Field=null |
14 | 14 |
Stats_output_Path=/tmp/tstats/ |
modules/dnet-openaire-stats-workflow/trunk/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/oozie_app/workflow.xml | ||
---|---|---|
26 | 26 |
</configuration> |
27 | 27 |
</global> |
28 | 28 |
|
29 |
<start to="mr_export"/> |
|
29 |
<start to="prepareDatabase"/> |
|
30 |
<!--<start to="prepareDatabase"/>--> |
|
30 | 31 |
|
32 |
<action name="prepareDatabase"> |
|
33 |
<java> |
|
34 |
<prepare> |
|
35 |
</prepare> |
|
36 |
<configuration> |
|
37 |
<property> |
|
38 |
<name>mapred.job.queue.name</name> |
|
39 |
<value>${queueName}</value> |
|
40 |
</property> |
|
41 |
</configuration> |
|
42 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
43 |
<arg>-SworkingDir=${workingDir}</arg> |
|
44 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg> |
|
45 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
46 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
47 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
48 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
|
49 |
</java> |
|
50 |
|
|
51 |
<ok to="mr_export"/> |
|
52 |
<error to="fail"/> |
|
53 |
</action> |
|
54 |
|
|
31 | 55 |
<action name="mr_export"> |
32 | 56 |
<map-reduce> |
33 |
|
|
34 | 57 |
<prepare> |
35 | 58 |
<delete path="${nameNode}${Stats_output_Path}"/> |
36 |
|
|
37 | 59 |
</prepare> |
38 | 60 |
<configuration> |
39 | 61 |
<property> |
... | ... | |
43 | 65 |
<property> |
44 | 66 |
<name>hbase.rootdir</name> |
45 | 67 |
<value>$nameNode/hbase</value> |
46 |
|
|
47 | 68 |
</property> |
48 |
|
|
49 | 69 |
<property> |
50 | 70 |
<name>hbase.security.authentication</name> |
51 | 71 |
<value>simple</value> |
52 | 72 |
</property> |
73 |
|
|
53 | 74 |
<!-- ZOOKEEPER --> |
54 |
|
|
55 | 75 |
<property> |
56 | 76 |
<name>hbase.zookeeper.quorum</name> |
57 | 77 |
<value> |
... | ... | |
65 | 85 |
<value>root-region-server</value> |
66 | 86 |
|
67 | 87 |
</property> |
68 |
|
|
69 | 88 |
<property> |
70 | 89 |
<name>hbase.zookeeper.property.clientPort</name> |
71 | 90 |
<value>2181</value> |
72 | 91 |
</property> |
73 | 92 |
|
74 |
|
|
75 | 93 |
<!-- MR IO --> |
76 |
|
|
77 |
|
|
78 | 94 |
<property> |
79 | 95 |
<name>mapreduce.inputformat.class</name> |
80 | 96 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value> |
81 | 97 |
</property> |
82 |
|
|
83 | 98 |
<property> |
84 | 99 |
<name>mapred.mapoutput.key.class</name> |
85 | 100 |
<value>org.apache.hadoop.io.Text</value> |
... | ... | |
96 | 111 |
<name>mapred.output.value.class</name> |
97 | 112 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value> |
98 | 113 |
</property> |
99 |
|
|
100 | 114 |
<!-- ## This is required for new MapReduce API usage --> |
101 | 115 |
<property> |
102 | 116 |
<name>mapred.mapper.new-api</name> |
... | ... | |
106 | 120 |
<name>mapred.reducer.new-api</name> |
107 | 121 |
<value>true</value> |
108 | 122 |
</property> |
109 |
|
|
110 | 123 |
<!-- # Job-specific options --> |
111 | 124 |
<property> |
112 | 125 |
<name>dfs.blocksize</name> |
... | ... | |
818 | 831 |
<arg>${Stats_output_Path}</arg> |
819 | 832 |
<arg>${isLookupEndpoint}</arg> |
820 | 833 |
</java> |
821 |
<ok to="prepareDatabase"/>
|
|
834 |
<ok to="sqoopImport"/>
|
|
822 | 835 |
|
823 | 836 |
<error to="fail"/> |
824 | 837 |
</action> |
825 | 838 |
|
826 |
|
|
827 |
<action name="prepareDatabase"> |
|
828 |
<java> |
|
829 |
<prepare> |
|
830 |
</prepare> |
|
831 |
<configuration> |
|
832 |
<property> |
|
833 |
<name>mapred.job.queue.name</name> |
|
834 |
<value>${queueName}</value> |
|
835 |
</property> |
|
836 |
</configuration> |
|
837 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
838 |
<arg>-SworkingDir=${workingDir}</arg> |
|
839 |
<arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg> |
|
840 |
<arg>-PStats_db_Url=${Stats_db_Url}</arg> |
|
841 |
<arg>-PStats_db_User=${Stats_db_User}</arg> |
|
842 |
<arg>-PStats_db_Pass=${Stats_db_Pass}</arg> |
|
843 |
<arg>-PStats_db_Driver=${Stats_db_Driver}</arg> |
|
844 |
</java> |
|
845 |
|
|
846 |
<ok to="sqoopImport"/> |
|
847 |
<error to="fail"/> |
|
848 |
</action> |
|
849 |
|
|
850 | 839 |
<action name="sqoopImport"> |
851 | 840 |
<java> |
852 | 841 |
<prepare> |
modules/dnet-openaire-stats-workflow/trunk/pom.xml | ||
---|---|---|
42 | 42 |
<dependency> |
43 | 43 |
<groupId>eu.dnetlib</groupId> |
44 | 44 |
<artifactId>dnet-openaire-stats</artifactId> |
45 |
<version>3.0.0-SNAPSHOT</version>
|
|
45 |
<version>4.0.0-SNAPSHOT</version>
|
|
46 | 46 |
</dependency> |
47 | 47 |
|
48 | 48 |
<dependency> |
Also available in: Unified diff
numerous bug fixes and changes to schema