Project

General

Profile

« Previous | Next » 

Revision 58373

removed usage stats code to its own project

View differences:

modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/java/eu/dnetlib/iis/core/workflows/stats/UsageStatsWrapper.java
1
package eu.dnetlib.iis.core.workflows.stats;
2

  
3
import eu.dnetlib.iis.core.java.PortBindings;
4
import eu.dnetlib.iis.core.java.Process;
5
import eu.dnetlib.iis.core.java.porttype.PortType;
6
import eu.dnetlib.usagestats.export.UsageStatsExporter;
7
import org.apache.hadoop.conf.Configuration;
8
import org.apache.log4j.Logger;
9
import java.util.HashMap;
10
import java.util.Map;
11

  
12
public class UsageStatsWrapper implements Process {
13
    private Logger log = Logger.getLogger(this.getClass());
14
    private UsageStatsExporter usageStatsExporter;
15

  
16
    @Override
17
    public void run(PortBindings portBindings, Configuration context, Map<String, String> parameters) throws Exception {
18

  
19
        usageStatsExporter = new UsageStatsExporter();
20
        usageStatsExporter.setUsername(parameters.get("piwik_username"));
21
        usageStatsExporter.setPassword(parameters.get("piwik_pass"));
22
        usageStatsExporter.setHttpProtocol(parameters.get("piwik_httpProtocol"));
23
        usageStatsExporter.setPiwikUrl(parameters.get("piwik_url"));
24
        usageStatsExporter.setSiteId(parameters.get("piwik_siteId"));
25
        usageStatsExporter.setStartDate(parameters.get("piwik_startDate"));
26
        usageStatsExporter.setStartDate(parameters.get("piwik_startDate"));
27
        usageStatsExporter.setFinalDate(parameters.get("piwik_finalDate"));
28
        usageStatsExporter.setLogsPath(parameters.get("piwik_logsPath"));
29
        usageStatsExporter.setFilterOffset(parameters.get("piwik_filterOffset"));
30
        //usageStatsExporter.setDbSchema(parameters.get("piwik_schema"));
31
        usageStatsExporter.setDbUrl(parameters.get("Stats_db_Url"));
32
        usageStatsExporter.setDbUsername(parameters.get("Stats_db_User"));
33
        usageStatsExporter.setDbPassword(parameters.get("Stats_db_Pass"));
34
        usageStatsExporter.setTokenAuth(parameters.get("token_auth"));
35

  
36

  
37
        log.info("Parameters: " + usageStatsExporter.getDbUrl() + usageStatsExporter.getUsername() + usageStatsExporter.getDbPassword());
38
        log.info("Parameters: " + usageStatsExporter.getDbSchema() + usageStatsExporter.getLogsPath());
39

  
40
        for (Map.Entry<String, String> e : parameters.entrySet()) {
41
            log.info(e.getKey() + ":" + e.getValue());
42
        }
43

  
44
        try {
45
            usageStatsExporter.export();
46
            log.info("Usage Stats Exported!...");
47
        } catch (Exception e){
48
            log.error("Usage Stats Export failed: ", e);
49
            throw new Exception("Usage Stats Export failed: " + e.toString(), e);
50
        }
51

  
52
    }
53

  
54

  
55
    @Override
56
    public Map<String, PortType> getInputPorts() {
57
        return createInputPorts();
58

  
59
    }
60

  
61
    @Override
62
    public Map<String, PortType> getOutputPorts() {
63
        return createOutputPorts();
64

  
65
    }
66

  
67
    private static HashMap<String, PortType> createInputPorts() {
68
        HashMap<String, PortType> inputPorts = new HashMap<String, PortType>();
69

  
70
        return inputPorts;
71
    }
72

  
73
    private static HashMap<String, PortType> createOutputPorts() {
74
        HashMap<String, PortType> outputPorts = new HashMap<String, PortType>();
75

  
76
        return outputPorts;
77
    }
78

  
79
}
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/run_example_locally.sh
1
mvn clean package -Poozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/update_example_workflow_apps_list.py
1
#!/usr/bin/env python
2

  
3
## Generates a new version of the "generate_example_workflow_apps.properties" 
4
## file that contains paths to all of the example workflows stored in this
5
## project. This is done by scanning the directory tree and searching for
6
## directories that look like they contain workflow definitions.
7

  
8
from __future__ import print_function
9

  
10
import os
11
import os.path
12

  
13
dir_with_examples = "src/test/resources/eu/dnetlib/iis/core/examples"
14
dirs_to_ignore = [".svn"]
15
output_file = "src/main/scripts/generate_example_workflow_apps.properties"
16

  
17
def does_contain_example(dir_path):
18
	if os.path.exists(os.path.join(dir_path, "oozie_app")):
19
		return True
20
	else:
21
		return False
22

  
23
examples = []
24

  
25
for root, dirs, files in os.walk(dir_with_examples):
26
	for dir_to_ignore in dirs_to_ignore:
27
		dirs.remove(dir_to_ignore)
28
	dirs_to_remove = []
29
	for dir_ in dirs:
30
		dir_path = os.path.join(root, dir_)
31
		if does_contain_example(dir_path):
32
			examples.append(dir_path)
33
			dirs_to_remove.append(dir_)
34
	for dir_to_remove in dirs_to_remove:
35
		dirs.remove(dir_to_remove)
36

  
37
examples = sorted(examples)
38
with open(output_file, "w") as f:
39
	for e in examples:
40
		print(e, file=f)
41
	print("# remember to leave '\\n' after the last line\n", file=f)
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/generate_example_workflow_apps.properties
1
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_with_unicode_escape_codes
2
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer
3
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_explicit_schema_file
4
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_subworkflow
5
eu/dnetlib/iis/core/examples/hadoopstreaming/wordcount_with_distributed_cache
6
eu/dnetlib/iis/core/examples/java/cloner
7
eu/dnetlib/iis/core/examples/java/joiner
8
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer
9
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer-failing
10
eu/dnetlib/iis/core/examples/java/line_by_line_copier
11
eu/dnetlib/iis/core/examples/javamapreduce/cloner
12
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_explicit_schema
13
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output
14
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_with_explicit_schema
15
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer
16
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer_with_explicit_schema
17
eu/dnetlib/iis/core/examples/javamapreduce/cloner_without_reducer
18
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner
19
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner_with_explicit_schema
20
eu/dnetlib/iis/core/examples/javamapreduce/person_by_age_splitter
21
eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
22
eu/dnetlib/iis/core/examples/parallel/parallel_joiner
23
eu/dnetlib/iis/core/examples/pig/basic
24
eu/dnetlib/iis/core/examples/pig/joiner
25
eu/dnetlib/iis/core/examples/pig/joiner_with_explicit_schema
26
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter
27
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter_with_subworkflow
28
eu/dnetlib/iis/core/examples/protobuf/java/cloner
29
eu/dnetlib/iis/core/examples/protobuf/java/joiner
30
eu/dnetlib/iis/core/examples/protobuf/java/line_by_line_copier
31
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner
32
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_with_multiple_output
33
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_without_reducer
34
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/person_by_age_splitter
35
eu/dnetlib/iis/core/examples/protobuf/subworkflow/cloners
36
eu/dnetlib/iis/core/examples/subworkflow/cloners
37
# remember to leave '\n' after the last line
38

  
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/README.markdown
1
It is assumed that the scripts in this directory are executed with the current directory set to the main directory of the project - the one that contains the `pom.xml` file.
2

  
3
The scripts with the `run_locally` prefix generate example Oozie workflow applications that are supposed to be run on a local installation of Hadoop (in the standalone or pseudo-distributed mode). On the other hand, scripts with the `run_on_cluster` prefix generate example Oozie workflow applications that are supposed to be run on OpenAIRE+ Hadoop cluster.
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/run_example_test_on_cluster.sh
1
mvn clean package -Pattach-test-resources,oozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/run_example_on_cluster.sh
1
mvn clean package -Poozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/scripts/run_example_test_locally.sh
1
mvn clean package -Pattach-test-resources,oozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/job.properties
1
Stats_db_Driver=org.postgresql.Driver
2
Stats_db_Pass=sqoop
3
Stats_db_Url=jdbc:postgresql://vatopedi.di.uoa.gr:5432/stats
4
Stats_db_User=sqoop
5
Stats_db_table_map=datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultResult=result_results,resultTopic=result_topics,category=category,claim=claim,concept=concept,category=category,context=context,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources, resultDescription=result_descriptions,datasourceStats=datasource_stats,organizationStats=organization_stats,projectStats=project_stats,resultStats=result_stats,resultOid=result_oids,projectOid=project_oids,datasourceOid=datasource_oids,organizationOid=organization_oids,resultPid=result_pids,resultCitation=result_citations
6
Stats_delim_Character=!
7
Stats_enclosing_Character=#
8
Stats_output_Path=/tmp/tstats/
9
Stats_sqoop_RecsPerStatement=10000
10
Stats_sqoop_ReducersCount=8
11
Stats_sqoop_StatementPerTrans=1000000
12
dfs.client.failover.proxy.provider.dm-cluster-nn=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
13
dfs.ha.namenodes.dm-cluster-nn=nn1,nn2
14
dfs.namenode.http-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:50070
15
dfs.namenode.rpc-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:8020
16
dfs.replication=2
17
fs.defaultFS=hdfs://dm-cluster-nn
18
hadoop.rpc.socket.factory.class.default=org.apache.hadoop.net.StandardSocketFactory
19
hadoop.security.auth_to_local=DEFAULT
20
hadoop.security.authentication=simple
21
hbase.rootdir=hdfs://dm-cluster-nn/hbase
22
hbase.security.authentication=simple
23
hbase.zookeeper.client.port=2181
24
hbase.zookeeper.property.clientPort=2181
25
hbase.zookeeper.quorum=namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
26
mapred.client.failover.proxy.provider.dm-cluster-jt=org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider
27
mapred.job.tracker=dm-cluster-jt
28
mapred.jobtrackers.dm-cluster-jt=jt1,jt2
29
mapred.mapper.new-api=true
30
mapred.reducer.new-api=true
31
oozie.service.loc=http://oozie.hadoop.dm.openaire.eu:11000/oozie
32
oozie.wf.application.path=hdfs://dm-cluster-nn/user/tsampikos.livisianos/core/javamapreduce/stats/oozie_app
33
piwik_filterOffset=5
34
piwik_finalDate=2016-01-03
35
piwik_httpProtocol=https
36
piwik_logsPath=/tmp/tpiwikLogs_update/
37
piwik_pass=eiLae9EiyahXea9aemiesiiJ
38
piwik_siteId=5
39
piwik_startDate=2016-01-01
40
piwik_url=analytics.openaire.eu
41
piwik_username=usage_openaire
42
token_auth=32846584f571be9b57488bf4088f30ea
43
workingDir=/user/tsampikos.livisianos/core/javamapreduce/stats/working_dir
44
zookeeper.znode.parent=/hbase
45
zookeeper.znode.rootserver=root-region-server
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/oozie_app/workflow.xml
1
<workflow-app name="usage-stats-export"
2
              xmlns="uri:oozie:workflow:0.4">
3
    <global>
4
        <job-tracker>${jobTracker}</job-tracker>
5
        <name-node>${nameNode}</name-node>
6
        <configuration>
7
            <property>
8
                <name>mapred.job.queue.name</name>
9
                <value>${queueName}</value>
10
            </property>
11
            <property>
12
                <name>oozie.sqoop.log.level</name>
13
                <value>DEBUG</value>
14
            </property>
15
            <property>
16
                <name>oozie.launcher.mapred.job.priority</name>
17
                <value>NORMAL</value>
18
            </property>
19
            <property>
20
                <name>mapred.job.priority</name>
21
                <value>NORMAL</value>
22
            </property>
23
        </configuration>
24
    </global>
25

  
26
    <start to="exportUsageStats"/>
27

  
28
    <action name='exportUsageStats'>
29
        <java>
30
            <prepare>
31
                <delete path="${nameNode}${piwik_logsPath}"/>
32
                <mkdir path="${nameNode}${piwik_logsPath}"/>
33
            </prepare>
34
            <configuration>
35
                <property>
36
                    <name>mapred.job.queue.name</name>
37
                    <value>${queueName}</value>
38
                </property>
39

  
40
                <property>
41
                    <name>oozie.sqoop.log.level</name>
42
                    <value>DEBUG</value>
43
                </property>
44
            </configuration>
45

  
46
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
47
            <arg>-SworkingDir=${workingDir}</arg>
48
            <arg>eu.dnetlib.iis.core.workflows.stats.UsageStatsWrapper</arg>
49
            <arg>-Ppiwik_username=${piwik_username}</arg>
50
            <arg>-Ppiwik_pass=${piwik_pass}</arg>
51
            <arg>-Ppiwik_httpProtocol=${piwik_httpProtocol}</arg>
52
            <arg>-Ppiwik_url=${piwik_url}</arg>
53
            <arg>-Ppiwik_siteId=${piwik_siteId}</arg>
54
            <arg>-Ppiwik_startDate=${piwik_startDate}</arg>
55
            <arg>-Ppiwik_finalDate=${piwik_finalDate}</arg>
56
            <arg>-Ppiwik_logsPath=${piwik_logsPath}</arg>
57
            <arg>-Ppiwik_filterOffset=${piwik_filterOffset}</arg>
58
            <arg>-Ppiwiki_schema=${piwiki_schema}</arg>
59
            <arg>-PStats_db_Url=${Stats_db_Url}</arg>
60
            <arg>-PStats_db_User=${Stats_db_User}</arg>
61
            <arg>-PStats_db_Pass=${Stats_db_Pass}</arg>
62
            <arg>-Ptoken_auth=${token_auth}</arg>
63
        </java>
64

  
65
        <ok to="cleanUpHDFS"/>
66
        <error to="fail"/>
67
    </action>
68
    <action name="cleanUpHDFS">
69
        <fs>
70
            <delete path="${nameNode}${piwik_logsPath}"/>
71
        </fs>
72
        <ok to="end"/>
73
        <error to="fail"/>
74
    </action>
75
    <kill name="fail">
76
        <message>
77
            Unfortunately, the process failed -- error message: [${wf:errorMessage(wf:lastErrorNode())}]
78
        </message>
79
    </kill>
80
    <end name="end"/>
81
</workflow-app>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/src/main/resources/oozie-log4j.properties
1
log4j.rootLogger=DEBUG,oozie
2
  
3
log4j.appender.oozie=org.apache.log4j.RollingFileAppender
4
log4j.appender.oozie.Target=System.out
5
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
6
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
7
log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH
8

  
9
log4j.appender.none=org.apache.log4j.varia.NullAppender
10

  
11
# Direct log messages to a log file
12
log4j.appender.oozie=org.apache.log4j.RollingFileAppender
13
log4j.appender.oozie.File=/tmp/statsExportLog
14
log4j.appender.oozie.MaxFileSize=1MB
15
log4j.appender.oozie.MaxBackupIndex=1
16
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
17
log4j.appender.oozie.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
18

  
19
log4j.logger.org.apache.hadoop=oozie,DEBUG, test
20
log4j.logger.eu.dnetlib.iis=DEBUG,oozie
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/hsqldb-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>org.hsqldb.jdbcDriver</value>
21
    </property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.url</name>
24
        <value>jdbc:hsqldb:mem:oozie-db;create=true</value>
25
    </property>
26
</configuration>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/mysql-oozie-site.xml
1
<?xml version="1.0"?>
2
<!--
3
  Licensed to the Apache Software Foundation (ASF) under one
4
  or more contributor license agreements.  See the NOTICE file
5
  distributed with this work for additional information
6
  regarding copyright ownership.  The ASF licenses this file
7
  to you under the Apache License, Version 2.0 (the
8
  "License"); you may not use this file except in compliance
9
  with the License.  You may obtain a copy of the License at
10

  
11
       http://www.apache.org/licenses/LICENSE-2.0
12

  
13
  Unless required by applicable law or agreed to in writing, software
14
  distributed under the License is distributed on an "AS IS" BASIS,
15
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
  See the License for the specific language governing permissions and
17
  limitations under the License.
18
-->
19
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
20
<configuration>
21
    <property>
22
      <name>oozie.service.JPAService.jdbc.driver</name>
23
        <value>com.mysql.jdbc.Driver</value>
24
        <description>JDBC driver class.</description>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.port</name>
28
        <value>3306</value>
29
    </property>
30
    <property>
31
      <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:mysql://${oozie.test.db.host}:${oozie.test.db.port}/oozie</value>
33
        <description>JDBC URL.</description>
34
    </property>
35
    <property>
36
        <name>oozie.service.JPAService.jdbc.username</name>
37
        <value>oozie</value>
38
        <description>DB user name.</description>
39
    </property>
40
    <property>
41
        <name>oozie.service.JPAService.jdbc.password</name>
42
        <value>oozie</value>
43
        <description>
44
            DB user password. IMPORTANT: if password is emtpy leave a 1 space string, the service trims the
45
            value, if empty Configuration assumes it is NULL.
46
        </description>
47
    </property>
48
</configuration>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/oracle-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>oracle.jdbc.driver.OracleDriver</value>
21
    </property>
22
    <property>
23
        <name>oozie.test.db.port</name>
24
        <value>1521</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.name</name>
28
        <value>xe</value>
29
    </property>
30
    <property>
31
        <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:oracle:thin:@//${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.username</name>
36
        <value>oozie</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.password</name>
40
        <value>oozie</value>
41
    </property>
42
</configuration>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/postgres-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>org.postgresql.Driver</value>
21
    </property>
22
    <property>
23
        <name>oozie.test.db.port</name>
24
        <value>5432</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.name</name>
28
        <value>oozie</value>
29
    </property>
30
    <property>
31
        <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:postgresql://${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.username</name>
36
        <value>oozie</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.password</name>
40
        <value>oozie</value>
41
    </property>
42
</configuration>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/test-oozie-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=ALL, null
45

  
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/hadoop-config.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Licensed to the Apache Software Foundation (ASF) under one
5
  or more contributor license agreements.  See the NOTICE file
6
  distributed with this work for additional information
7
  regarding copyright ownership.  The ASF licenses this file
8
  to you under the Apache License, Version 2.0 (the
9
  "License"); you may not use this file except in compliance
10
  with the License.  You may obtain a copy of the License at
11

  
12
       http://www.apache.org/licenses/LICENSE-2.0
13

  
14
  Unless required by applicable law or agreed to in writing, software
15
  distributed under the License is distributed on an "AS IS" BASIS,
16
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
  See the License for the specific language governing permissions and
18
  limitations under the License.
19
-->
20
<configuration>
21

  
22
  <property>
23
<name>fs.default.name</name>
24
<value>hdfs://nmis-hadoop-cluster</value>
25
  </property>
26
	  
27
    <property>
28
        <name>mapreduce.jobtracker.kerberos.principal</name>
29
        <value>mapred/_HOST@LOCALREALM</value>
30
    </property>
31

  
32
    <property>
33
        <name>dfs.namenode.kerberos.principal</name>
34
        <value>hdfs/_HOST@LOCALREALM</value>
35
    </property>
36

  
37
    <property>
38
        <name>mapreduce.framework.name</name>
39
        <value>yarn</value>
40
    </property>
41

  
42
</configuration>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/src/test/resources/test-custom-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=NONE, null
45

  
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/core/README.md
1
ATTENTION: Please **do not touch this directory** nor its contents since it might break the integration tests.
2

  
3
This directory, its subdirectories, and contained files are here as a hack to make the Oozie integration unit tests work.
4

  
5
Details
6
-------
7
Oozie tests assume that they're placed inside directory tree of Oozie source code -- see the source code of class `XTestCase` which is an ancestor of `MiniOozieTestCase` class which, in turn, should be inherited by your test case class. 
8

  
9
How to get the source code of the `XTestCase` class:
10

  
11
- download source code of the Ubuntu's `oozie` package prepared by Cloudera (`apt-get source oozie`). It is version 3.1.3+155 of this package. 
12
- open file `oozie-3.1.3+155/src/core/src/test/java/org/apache/oozie/test/XTestCase.java` and look at lines 93-105.
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
4
    <parent>
5
        <groupId>eu.dnetlib</groupId>
6
        <artifactId>icm-iis-parent-container</artifactId>
7
        <version>1.0.3</version>
8
    </parent>
9
    <modelVersion>4.0.0</modelVersion>
10
    <artifactId>dnet-openaire-usage-stats-workflow</artifactId>
11
    <packaging>jar</packaging>
12
    <version>2.0.0</version>
13

  
14
    <!-- 	REMOVE BEFORE DEPLOYMENT -->
15
    <properties>
16
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
17
        <iis.hadoop.frontend.home.dir>home</iis.hadoop.frontend.home.dir>
18

  
19
        <queueName>default</queueName>
20

  
21
        <iis.hadoop.frontend.host.name>gateway.hadoop.dm.openaire.eu</iis.hadoop.frontend.host.name>
22
        <oozieServiceLoc>http://oozie.hadoop.dm.openaire.eu:11000/oozie</oozieServiceLoc>
23
        <nameNode>hdfs://dm-cluster-nn</nameNode>
24
        <jobTracker>dm-cluster-jt</jobTracker>
25
    </properties>
26
    <dependencies>
27
        <dependency>
28
            <groupId>eu.dnetlib</groupId>
29
            <artifactId>icm-iis-core</artifactId>
30
            <version>1.0.2</version>
31
        </dependency>
32

  
33
        <dependency>
34
            <groupId>eu.dnetlib</groupId>
35
            <artifactId>dnet-openaire-usage-stats</artifactId>
36
            <version>2.0.0-SNAPSHOT</version>
37
        </dependency>
38
    </dependencies>
39
    <repositories>
40
        <repository>
41
            <id>cloudera</id>
42
            <name>Cloudera Repository</name>
43
            <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
44
            <releases>
45
                <enabled>true</enabled>
46
            </releases>
47
            <snapshots>
48
                <enabled>false</enabled>
49
            </snapshots>
50
        </repository>
51
    </repositories>
52
</project>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/readme.markdown
1
Execute the scripts in the following order:
2

  
3
1. `upload_workflow.sh`
4
2. `run_workflow.sh`
5
3. `print_working_dir.sh` or `get_working_dir.sh`
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/install.sh
1
#!/bin/bash
2

  
3
cd ~/workspace/DNET45/dnet-openaire-stats/trunk
4
mvn clean install -Dmaven.test.skip=true
5

  
6
cd ~/workspace/DNET45/dnet-openaire-usage-stats-export/trunk
7
mvn clean install -Dmaven.test.skip=true
8

  
9
cd ~/workspace/DNET45/dnet-openaire-stats-workflow/trunk
10
mvn clean package  -Dworkflow.source.dir=eu/dnetlib/iis/core/javamapreduce/stats -Poozie-package -Diis.hadoop.frontend.home.dir=/home  -Duser.name=tsampikos.livisianos
11

  
12
scp ./target/oozie-package.tar.gz tsampikos.livisianos@gateway.hadoop.dm.openaire.eu:~/dnet45
13 0

  
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-workflow/dnet-openaire-usage-stats-workflow.iml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
3
  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
4
    <output url="file://$MODULE_DIR$/target/classes" />
5
    <output-test url="file://$MODULE_DIR$/target/test-classes" />
6
    <content url="file://$MODULE_DIR$">
7
      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
8
      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
9
      <excludeFolder url="file://$MODULE_DIR$/target" />
10
    </content>
11
    <orderEntry type="inheritedJdk" />
12
    <orderEntry type="sourceFolder" forTests="false" />
13
    <orderEntry type="library" name="Maven: eu.dnetlib:icm-iis-core:1.0.2" level="project" />
14
    <orderEntry type="library" name="Maven: eu.dnetlib:icm-iis-3rdparty-avrojsoncoders:1.0.3" level="project" />
15
    <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.1" level="project" />
16
    <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
17
    <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.4" level="project" />
18
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.8.8" level="project" />
19
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.8.8" level="project" />
20
    <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
21
    <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.4.1" level="project" />
22
    <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
23
    <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
24
    <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.6.4" level="project" />
25
    <orderEntry type="library" name="Maven: org.apache.avro:avro-mapred:hadoop2:1.7.4" level="project" />
26
    <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:1.7.4" level="project" />
27
    <orderEntry type="library" name="Maven: io.netty:netty:3.4.0.Final" level="project" />
28
    <orderEntry type="library" name="Maven: org.apache.velocity:velocity:1.7" level="project" />
29
    <orderEntry type="library" name="Maven: org.mortbay.jetty:servlet-api:2.5-20081211" level="project" />
30
    <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:tests:1.7.4" level="project" />
31
    <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
32
    <orderEntry type="module" module-name="dnet-openaire-usage-stats" />
33
    <orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
34
    <orderEntry type="library" name="Maven: junit:junit:4.10" level="project" />
35
    <orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
36
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.0.0-cdh4.7.0" level="project" />
37
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.0.0-cdh4.7.0" level="project" />
38
    <orderEntry type="library" name="Maven: com.google.guava:guava:11.0.2" level="project" />
39
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math:2.1" level="project" />
40
    <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
41
    <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
42
    <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.4" level="project" />
43
    <orderEntry type="library" name="Maven: commons-io:commons-io:2.1" level="project" />
44
    <orderEntry type="library" name="Maven: commons-net:commons-net:3.1" level="project" />
45
    <orderEntry type="library" name="Maven: javax.servlet:servlet-api:2.5" level="project" />
46
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26.cloudera.2" level="project" />
47
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26.cloudera.2" level="project" />
48
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.8" level="project" />
49
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-json:1.8" level="project" />
50
    <orderEntry type="library" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
51
    <orderEntry type="library" name="Maven: stax:stax-api:1.0.1" level="project" />
52
    <orderEntry type="library" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.3-1" level="project" />
53
    <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
54
    <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
55
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.7.1" level="project" />
56
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.7.1" level="project" />
57
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-server:1.8" level="project" />
58
    <orderEntry type="library" name="Maven: asm:asm:3.1" level="project" />
59
    <orderEntry type="library" scope="RUNTIME" name="Maven: tomcat:jasper-compiler:5.5.23" level="project" />
60
    <orderEntry type="library" scope="RUNTIME" name="Maven: tomcat:jasper-runtime:5.5.23" level="project" />
61
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.servlet.jsp:jsp-api:2.1" level="project" />
62
    <orderEntry type="library" scope="RUNTIME" name="Maven: commons-el:commons-el:1.0" level="project" />
63
    <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
64
    <orderEntry type="library" name="Maven: org.apache.hadoop:cloudera-jets3t:2.0.0-cdh4.7.0" level="project" />
65
    <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.6.1" level="project" />
66
    <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.5" level="project" />
67
    <orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
68
    <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
69
    <orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
70
    <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
71
    <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
72
    <orderEntry type="library" scope="RUNTIME" name="Maven: org.slf4j:slf4j-log4j12:1.6.1" level="project" />
73
    <orderEntry type="library" name="Maven: org.mockito:mockito-all:1.8.5" level="project" />
74
    <orderEntry type="library" name="Maven: net.sf.kosmosfs:kfs:0.3" level="project" />
75
    <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.4.0a" level="project" />
76
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.0.0-cdh4.7.0" level="project" />
77
    <orderEntry type="library" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
78
    <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.5-cdh4.7.0" level="project" />
79
    <orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
80
    <orderEntry type="library" name="Maven: eu.dnetlib:icm-iis-assembly-resources:1.0.1" level="project" />
81
    <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
82
  </component>
83
</module>
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/UsageStatsExporter.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import org.apache.log4j.Logger;
4

  
5
public class UsageStatsExporter {
6

  
7
    private String username;
8
    private String password;
9
    private String httpProtocol;
10
    private String piwikUrl;
11
    private String siteId;
12
    private String startDate;
13
    private String finalDate;
14
    private String logsPath;
15
    private String filterOffset;
16

  
17

  
18
    private String tokenAuth;
19
    private String irusBaseURL;
20
    private String irusReportType;
21
    private String irusRelease;
22
    private String irusRequestorID;
23
    private String irusRepositoryIdentifier;
24
    private String irusPeriodGranularity;
25
    private String irusPeriod;
26

  
27

  
28
    private String dbUrl;
29
    private String dbSchema;
30
    private String dbUsername;
31
    private String dbPassword;
32

  
33
    private Logger log = Logger.getLogger(this.getClass());
34

  
35
    public UsageStatsExporter() {}
36

  
37
    public void export() throws Exception {
38

  
39
        PiwikDownloadLogs piwd = new PiwikDownloadLogs(username, password, tokenAuth, httpProtocol, piwikUrl, startDate, logsPath, dbUrl, dbUsername, dbPassword);
40
        piwd.getPiwikLogs();
41

  
42
        /*
43
         *  Create DB tables, insert/update statistics
44
         */
45

  
46
        PiwikStatsDB piwikstats = new PiwikStatsDB(dbUrl, dbUsername, dbPassword, logsPath);
47
        piwikstats.processLogs();
48

  
49
        //Create Usage Stats tables from Downloaded Logs
50
        piwikstats.usageStats();
51

  
52

  
53
        IrusStats irusStats = new IrusStats(dbUrl, dbUsername, dbPassword);
54
        irusStats.processIrusRRReport();
55
        irusStats.irusStats();
56

  
57
        SarcStats sarcStats = new SarcStats(dbUrl, dbUsername, dbPassword);
58
        sarcStats.processSarc();
59
        sarcStats.sarcStats();
60

  
61

  
62
        //finalize stats
63
        piwikstats.finalizeStats();
64

  
65
    }
66

  
67
    public String getUsername() {
68
        return username;
69
    }
70

  
71
    public void setUsername(String username) {
72
        this.username = username;
73
    }
74

  
75
    public String getPassword() {
76
        return password;
77
    }
78

  
79
    public void setPassword(String password) {
80
        this.password = password;
81
    }
82

  
83
    public String getHttpProtocol() {
84
        return httpProtocol;
85
    }
86

  
87
    public void setHttpProtocol(String httpProtocol) {
88
        this.httpProtocol = httpProtocol;
89
    }
90

  
91
    public String getPiwikUrl() {
92
        return piwikUrl;
93
    }
94

  
95
    public void setPiwikUrl(String piwikUrl) {
96
        this.piwikUrl = piwikUrl;
97
    }
98

  
99
    public String getSiteId() {
100
        return siteId;
101
    }
102

  
103
    public void setSiteId(String siteId) {
104
        this.siteId = siteId;
105
    }
106

  
107
    public String getStartDate() {
108
        return startDate;
109
    }
110

  
111
    public void setStartDate(String startDate) {
112
        this.startDate = startDate;
113
    }
114

  
115
    public String getFinalDate() {
116
        return finalDate;
117
    }
118

  
119
    public void setFinalDate(String finalDate) {
120
        this.finalDate = finalDate;
121
    }
122

  
123
    public String getLogsPath() {
124
        return logsPath;
125
    }
126

  
127
    public void setLogsPath(String logsPath) {
128
        this.logsPath = logsPath;
129
    }
130

  
131
    public String getFilterOffset() {
132
        return filterOffset;
133
    }
134

  
135
    public void setFilterOffset(String filterOffset) {
136
        this.filterOffset = filterOffset;
137
    }
138

  
139
    public String getDbUrl() {
140
        return dbUrl;
141
    }
142

  
143
    public void setDbUrl(String dbUrl) {
144
        this.dbUrl = dbUrl;
145
    }
146

  
147
    public String getDbSchema() {
148
        return dbSchema;
149
    }
150

  
151
    public void setDbSchema(String dbSchema) {
152
        this.dbSchema = dbSchema;
153
    }
154

  
155

  
156
    public String getDbUsername() {
157
        return dbUsername;
158
    }
159

  
160
    public void setDbUsername(String dbUsername) {
161
        this.dbUsername = dbUsername;
162
    }
163

  
164
    public String getDbPassword() {
165
        return dbPassword;
166
    }
167

  
168
    public void setDbPassword(String dbPassword) {
169
        this.dbPassword = dbPassword;
170
    }
171

  
172
    public String getTokenAuth() {
173
        return tokenAuth;
174
    }
175

  
176
    public void setTokenAuth(String tokenAuth) {
177
        this.tokenAuth = tokenAuth;
178
    }
179

  
180
    public String getIrusBaseURL() {
181
        return irusBaseURL;
182
    }
183

  
184
    public void setIrusBaseURL(String irusBaseURL) {
185
        this.irusBaseURL = irusBaseURL;
186
    }
187

  
188
    public String getIrusReportType() {
189
        return irusReportType;
190
    }
191

  
192
    public void setIrusReportType(String irusReportType) {
193
        this.irusReportType = irusReportType;
194
    }
195

  
196
    public String getIrusRelease() {
197
        return irusRelease;
198
    }
199

  
200
    public void setIrusRelease(String irusRelease) {
201
        this.irusRelease = irusRelease;
202
    }
203

  
204
    public String getIrusRequestorID() {
205
        return irusRequestorID;
206
    }
207

  
208
    public void setIrusRequestorID(String irusRequestorID) {
209
        this.irusRequestorID = irusRequestorID;
210
    }
211

  
212
    public String getIrusRepositoryIdentifier() {
213
        return irusRepositoryIdentifier;
214
    }
215

  
216
    public void setIrusRepositoryIdentifier(String irusRepositoryIdentifier) {
217
        this.irusRepositoryIdentifier = irusRepositoryIdentifier;
218
    }
219

  
220
    public String getIrusPeriodGranularity() {
221
        return irusPeriodGranularity;
222
    }
223

  
224
    public void setIrusPeriodGranularity(String irusPeriodGranularity) {
225
        this.irusPeriodGranularity = irusPeriodGranularity;
226
    }
227

  
228
    public String getIrusPeriod() {
229
        return irusPeriod;
230
    }
231

  
232
    public void setIrusPeriod(String irusPeriod) {
233
        this.irusPeriod = irusPeriod;
234
    }
235
}
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

  
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20

  
21
public class PiwikDownloadLogs {
22

  
23
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30

  
31
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34

  
35
    /*
36
       The Piwik's API method 
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40

  
41
    private final Logger log = Logger.getLogger(this.getClass());
42

  
43

  
44
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49

  
50
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52

  
53
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58
    }
59

  
60
    private String getPiwikLogUrl(){
61
        return httpProtocol + "://" + piwikUrl + "/";
62
    }
63

  
64
    private String getJson(String url,String username, String password) throws Exception {
65
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70

  
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84
            throw new Exception("Failed to get URL: " + e.toString(), e);
85
        }
86
    }
87

  
88
    public void getPiwikLogs() throws Exception{
89
        GetPortalLogs();
90
        GetRepositoriesLogs();
91
    }
92

  
93
    private void GetPortalLogs() throws Exception{
94

  
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97
        start.setTime(startDate);
98
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101

  
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
107
            ResultSet rs_date = st.executeQuery();
108

  
109
            while(rs_date.next()){
110
                start.setTime(sdf.parse(rs_date.getString(1)));
111
            }
112
            rs_date.close();
113
            conn.close();
114

  
115
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
116

  
117
                String period="&period=day&date="+sdf.format(date);
118
                log.info("Downloading logs for " + sdf.format(date));
119

  
120

  
121
                FileSystem fs = FileSystem.get(new Configuration());
122
                FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
123
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
124
                String content = "";
125

  
126
                int i=0;
127

  
128
                while(!content.equals("[]\n")) {
129
                    String apiUrl = baseApiUrl;
130

  
131
                    if (i > 0)
132
                        apiUrl += "&filter_offset=" + (i*1000);
133

  
134
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
135

  
136
                    fin.write(content.getBytes());
137

  
138
                    i++;
139
                }
140
                fin.close();
141
//
142
//
143
//
144
//
145
//
146
//
147
//                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
148
//                String content = getJson(apiUrl,piwikUsername,piwikPassword);
149
//
150
//                //for (int i=1;i<10;i++){
151
//                int i = 1;
152
//                while(true) {
153
//                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
154
//                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
155
//                    content += contentNew;
156
//                    i++;
157
//                    if(contentNew.equals("[]\n")){
158
//                        break;
159
//                    }
160
//                }
161
//                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
162
            }
163
        } catch (Exception e) {
164
            log.error("Failed to get portal logs", e);
165
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
166
        }
167
    }
168

  
169
    private void GetRepositoriesLogs() throws Exception{
170

  
171
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
172
        Calendar start = Calendar.getInstance();
173
        start.setTime(startDate);
174
        Calendar end = Calendar.getInstance();
175
        end.add(Calendar.DAY_OF_MONTH, -1);
176
        //end.setTime(getFinalDate());
177

  
178
        Class.forName("org.postgresql.Driver");
179
        Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
180
        Statement statement = conn.createStatement();
181
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
182
        while(rs.next()){
183
            int siteId = rs.getInt(1);
184
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
185

  
186
            start.setTime(startDate);
187

  
188
            log.info("downloading logs for site with piwik_id: " + siteId);
189

  
190
            st.setInt(1, siteId);
191
            ResultSet rs_date = st.executeQuery();
192

  
193
            while(rs_date.next()){
194
                //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
195
                if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
196
//                        start = Calendar.getInstance();
197
//                        start.add(Calendar.MONTH, -1);
198
                    // DO NOTHING USE this.startDate!!!
199
                }
200
                else {
201
                    start.setTime(sdf.parse(rs_date.getString(1)));
202
                }
203
            }
204
            rs_date.close();
205

  
206
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
207

  
208
                log.info("Downloading logs for " + sdf.format(date));
209

  
210
                String period="&period=day&date="+sdf.format(date);
211
                FileSystem fs = FileSystem.get(new Configuration());
212
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
213
                String content = "";
214

  
215
                int i=0;
216

  
217
                while(!content.equals("[]\n")) {
218
                    FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date)) + "_" + i + ".json"), true);
219
                    String apiUrl = baseApiUrl;
220

  
221
                    if (i > 0)
222
                        apiUrl += "&filter_offset=" + (i*1000);
223

  
224
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
225

  
226
                    fin.write(content.getBytes());
227
                    fin.close();
228

  
229
                    i++;
230
                }
231
            }
232
        }
233
        rs.close();
234
        conn.close();
235
    }
236

  
237
//    private void flushString(String data, String destination) throws Exception {
238
//        FSDataOutputStream fin;
239
//        try {
240
//            FileSystem fs = FileSystem.get(new Configuration());
241
//            fin = fs.create(new Path(destination), true);
242
//            fin.write(data.getBytes());
243
//            fin.close();
244
//        } catch (Exception e) {
245
//            log.error("Failed  to write exported data to a file : ", e);
246
//            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
247
//        }
248
//    }
249
}
modules/dnet-openaire-stats-export-wf/branches/prod/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/IrusStats.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
//import java.io.BufferedReader;
5
//import java.io.InputStreamReader;
6
import java.net.URL;
7
import java.net.URLConnection;
8
import java.sql.ResultSet;
9
import java.text.SimpleDateFormat;
10
import java.util.Date;
11
import java.util.Calendar;
12

  
13
import java.sql.Connection;
14
import java.sql.DriverManager;
15
import java.sql.PreparedStatement;
16
import java.sql.Statement;
17

  
18
import org.json.simple.JSONArray;
19
import org.json.simple.JSONObject;
20
import org.json.simple.parser.JSONParser;
21

  
22
import org.apache.log4j.Logger;
23

  
24
/**
25
 * Created by tsampikos on 28/3/2017.
26
 */
27
public class IrusStats {
28
     private final String dbUrl;
29
    private final String dbSchema;
30
    private final String dbUserName;
31
    private final String dbPassword;
32

  
33
    private Connection conn = null;
34
    private Statement stmt = null;
35

  
36
    private final Logger log = Logger.getLogger(this.getClass());
37

  
38
    public IrusStats(String dbUrl, String dbUsername, String dbPassword) throws Exception {
39
        this.dbUrl = dbUrl;
40
        this.dbSchema = "shadow";
41
        this.dbUserName = dbUsername;
42
        this.dbPassword = dbPassword;
43

  
44
        connectDB();
45
        createTables();
46
    }
47

  
48
    private void connectDB() throws Exception {
49
        try {
50
            Class.forName("org.postgresql.Driver");
51
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
52

  
53
            stmt = conn.createStatement();
54
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
55
            stmt.executeUpdate(sqlSetSearchPath);
56

  
57
            log.info("Opened database successfully");
58

  
59
        } catch (Exception e) {
60
            log.error("Connect to db failed: " + e);
61
            throw new Exception("Failed to connect to db: " + e.toString(), e);
62
        }
63
    }
64

  
65
    private void createTables() throws Exception {
66
        try {
67

  
68
            stmt = conn.createStatement();
69
            String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
70
            stmt.executeUpdate(sqlCreateTableSushiLog);
71

  
72
            stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));");
73

  
74
            String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
75
            stmt.executeUpdate(sqlCopyPublicSushiLog);
76

  
77
            String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
78
                " ON INSERT TO sushilog " +
79
                " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," +
80
                "sushilog.rid, sushilog.date " +
81
                "FROM sushilog " +
82
                "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
83
            stmt.executeUpdate(sqlcreateRuleSushiLog);
84

  
85
            stmt.close();
86
            conn.close();
87
            log.info("Sushi Tables Created");
88
        } catch (Exception e) {
89
            log.error("Failed to create tables: " + e);
90
            throw new Exception("Failed to create tables: " + e.toString(), e);
91
        }
92
    }
93

  
94
    public void irusStats() throws Exception {
95
        if (conn.isClosed())
96
            connectDB();
97

  
98
        stmt = conn.createStatement();
99
        conn.setAutoCommit(false);
100

  
101
        //String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
102
        //String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
103
        //String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total';";
104
        String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total' AND s.source='IRUS-UK';";
105
        stmt.executeUpdate(sql);
106

  
107

  
108
        stmt.close();
109
        conn.commit();
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff