Project

General

Profile

« Previous | Next » 

Revision 58819

Added by Dimitris Pierrakos almost 4 years ago

Handle large json files

View differences:

modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java
103 103
                    outFolder = repoLogsPath;
104 104
                }
105 105
                FileSystem fs = FileSystem.get(new Configuration());
106
                FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
106
                //FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
107 107

  
108 108
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
109 109
                String content = "";
......
111 111
                int i = 0;
112 112

  
113 113
                while (!content.equals("[]\n")) {
114
                    FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) +  "_"+i+".json"), true);
114 115
                    String apiUrl = baseApiUrl;
115 116

  
116 117
                    if (i > 0) {
......
122 123
                    fin.write(content.getBytes());
123 124

  
124 125
                    i++;
126
                    fin.close();
125 127
                }
126
                fin.close();
128
                //fin.close();
127 129

  
128 130
            }
129 131

  
130 132
        }
133
    }
131 134
}
modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/PiwikStatsDB.java
404 404
//        sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
405 405
        sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
406 406
        stmt.executeUpdate(sql);
407
        
408
        sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
409
        stmt.executeUpdate(sql);
407 410

  
408 411
        sql = "CREATE INDEX IF NOT EXISTS usage_stats_source ON usage_stats USING btree(source);";
409 412
        stmt.executeUpdate(sql);
modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/pom.xml
26 26
        <dependency>
27 27
            <groupId>org.apache.hadoop</groupId>
28 28
            <artifactId>hadoop-common</artifactId>
29
            <version>${hadoop.common.version}</version>
29
            <version>2.2.0</version>
30 30
            <type>jar</type>
31 31
        </dependency>
32 32
        <dependency>

Also available in: Unified diff