Project

General

Profile

« Previous | Next » 

Revision 55645

numerous bug fixes and changes to schema

View differences:

modules/dnet-openaire-stats-workflow/trunk/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/job.properties
8 8
Stats_delim_Character=!
9 9
Stats_enclosing_Character=#
10 10
Stats_getdeletedbyinference=false
11
Stats_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking], max=1000 }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
11
Stats_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
12 12
Stats_null_Numeric_Field=null
13 13
Stats_null_String_Field=null
14 14
Stats_output_Path=/tmp/tstats/
modules/dnet-openaire-stats-workflow/trunk/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/oozie_app/workflow.xml
26 26
        </configuration>
27 27
    </global>
28 28

  
29
    <start to="mr_export"/>
29
    <start to="prepareDatabase"/>
30
    <!--<start to="prepareDatabase"/>-->
30 31

  
32
    <action name="prepareDatabase">
33
        <java>
34
            <prepare>
35
            </prepare>
36
            <configuration>
37
                <property>
38
                    <name>mapred.job.queue.name</name>
39
                    <value>${queueName}</value>
40
                </property>
41
            </configuration>
42
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
43
            <arg>-SworkingDir=${workingDir}</arg>
44
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
45
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
46
            <arg>-PStats_db_User=${Stats_db_User}</arg>
47
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
48
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
49
        </java>
50

  
51
        <ok to="mr_export"/>
52
        <error to="fail"/>
53
    </action>
54

  
31 55
    <action name="mr_export">
32 56
        <map-reduce>
33

  
34 57
            <prepare>
35 58
                <delete path="${nameNode}${Stats_output_Path}"/>
36

  
37 59
            </prepare>
38 60
            <configuration>
39 61
                <property>
......
43 65
                <property>
44 66
                    <name>hbase.rootdir</name>
45 67
                    <value>$nameNode/hbase</value>
46

  
47 68
                </property>
48

  
49 69
                <property>
50 70
                    <name>hbase.security.authentication</name>
51 71
                    <value>simple</value>
52 72
                </property>
73

  
53 74
                <!-- ZOOKEEPER -->
54

  
55 75
                <property>
56 76
                    <name>hbase.zookeeper.quorum</name>
57 77
                    <value>
......
65 85
                    <value>root-region-server</value>
66 86

  
67 87
                </property>
68

  
69 88
                <property>
70 89
                    <name>hbase.zookeeper.property.clientPort</name>
71 90
                    <value>2181</value>
72 91
                </property>
73 92

  
74

  
75 93
                <!-- MR IO -->
76

  
77

  
78 94
                <property>
79 95
                    <name>mapreduce.inputformat.class</name>
80 96
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
81 97
                </property>
82

  
83 98
                <property>
84 99
                    <name>mapred.mapoutput.key.class</name>
85 100
                    <value>org.apache.hadoop.io.Text</value>
......
96 111
                    <name>mapred.output.value.class</name>
97 112
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
98 113
                </property>
99

  
100 114
                <!-- ## This is required for new MapReduce API usage -->
101 115
                <property>
102 116
                    <name>mapred.mapper.new-api</name>
......
106 120
                    <name>mapred.reducer.new-api</name>
107 121
                    <value>true</value>
108 122
                </property>
109

  
110 123
                <!-- # Job-specific options -->
111 124
                <property>
112 125
                    <name>dfs.blocksize</name>
......
818 831
            <arg>${Stats_output_Path}</arg>
819 832
            <arg>${isLookupEndpoint}</arg>
820 833
        </java>
821
        <ok to="prepareDatabase"/>
834
        <ok to="sqoopImport"/>
822 835

  
823 836
        <error to="fail"/>
824 837
    </action>
825 838

  
826

  
827
    <action name="prepareDatabase">
828
        <java>
829
            <prepare>
830
            </prepare>
831
            <configuration>
832
                <property>
833
                    <name>mapred.job.queue.name</name>
834
                    <value>${queueName}</value>
835
                </property>
836
            </configuration>
837
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
838
            <arg>-SworkingDir=${workingDir}</arg>
839
            <arg>eu.dnetlib.iis.core.workflows.stats.DBInitWrapper</arg>
840
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
841
            <arg>-PStats_db_User=${Stats_db_User}</arg>
842
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
843
            <arg>-PStats_db_Driver=${Stats_db_Driver}</arg>
844
        </java>
845

  
846
        <ok to="sqoopImport"/>
847
        <error to="fail"/>
848
    </action>
849

  
850 839
    <action name="sqoopImport">
851 840
        <java>
852 841
            <prepare>
modules/dnet-openaire-stats-workflow/trunk/pom.xml
42 42
        <dependency>
43 43
            <groupId>eu.dnetlib</groupId>
44 44
            <artifactId>dnet-openaire-stats</artifactId>
45
            <version>3.0.0-SNAPSHOT</version>
45
            <version>4.0.0-SNAPSHOT</version>
46 46
        </dependency>
47 47

  
48 48
        <dependency>

Also available in: Unified diff