Revision 58819
Added by Dimitris Pierrakos almost 4 years ago
modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java | ||
---|---|---|
103 | 103 |
outFolder = repoLogsPath; |
104 | 104 |
} |
105 | 105 |
FileSystem fs = FileSystem.get(new Configuration()); |
106 |
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); |
|
106 |
//FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
|
|
107 | 107 |
|
108 | 108 |
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; |
109 | 109 |
String content = ""; |
... | ... | |
111 | 111 |
int i = 0; |
112 | 112 |
|
113 | 113 |
while (!content.equals("[]\n")) { |
114 |
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + "_"+i+".json"), true); |
|
114 | 115 |
String apiUrl = baseApiUrl; |
115 | 116 |
|
116 | 117 |
if (i > 0) { |
... | ... | |
122 | 123 |
fin.write(content.getBytes()); |
123 | 124 |
|
124 | 125 |
i++; |
126 |
fin.close(); |
|
125 | 127 |
} |
126 |
fin.close(); |
|
128 |
//fin.close();
|
|
127 | 129 |
|
128 | 130 |
} |
129 | 131 |
|
130 | 132 |
} |
133 |
} |
|
131 | 134 |
} |
modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export/PiwikStatsDB.java | ||
---|---|---|
404 | 404 |
// sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
405 | 405 |
sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
406 | 406 |
stmt.executeUpdate(sql); |
407 |
|
|
408 |
sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
409 |
stmt.executeUpdate(sql); |
|
407 | 410 |
|
408 | 411 |
sql = "CREATE INDEX IF NOT EXISTS usage_stats_source ON usage_stats USING btree(source);"; |
409 | 412 |
stmt.executeUpdate(sql); |
modules/dnet-openaire-usage-stats-export-wf/branches/usage_stats_export_v2/dnet-openaire-usage-stats-export/pom.xml | ||
---|---|---|
26 | 26 |
<dependency> |
27 | 27 |
<groupId>org.apache.hadoop</groupId> |
28 | 28 |
<artifactId>hadoop-common</artifactId> |
29 |
<version>${hadoop.common.version}</version>
|
|
29 |
<version>2.2.0</version>
|
|
30 | 30 |
<type>jar</type> |
31 | 31 |
</dependency> |
32 | 32 |
<dependency> |
Also available in: Unified diff
Handle large json files