Revision 59067
Added by Dimitris Pierrakos almost 4 years ago
PiwikStatsDB.java | ||
---|---|---|
25 | 25 |
|
26 | 26 |
public class PiwikStatsDB { |
27 | 27 |
|
28 |
private String logPath; |
|
29 | 28 |
private String logRepoPath; |
30 | 29 |
private String logPortalPath; |
31 | 30 |
|
... | ... | |
35 | 34 |
private String CounterRobotsURL; |
36 | 35 |
private ArrayList robotsList; |
37 | 36 |
|
38 |
|
|
39 | 37 |
public PiwikStatsDB(String logRepoPath, String logPortalPath) throws Exception { |
40 | 38 |
this.logRepoPath = logRepoPath; |
41 | 39 |
this.logPortalPath = logPortalPath; |
... | ... | |
44 | 42 |
} |
45 | 43 |
|
46 | 44 |
public void foo() { |
47 |
Stream<String> s = Arrays.stream(new String[] {"a", "b", "c", "d"});
|
|
45 |
Stream<String> s = Arrays.stream(new String[]{"a", "b", "c", "d"}); |
|
48 | 46 |
|
49 | 47 |
System.out.println(s.parallel().count()); |
50 | 48 |
} |
... | ... | |
143 | 141 |
this.robotsList = counterRobots.getRobotsPatterns(); |
144 | 142 |
|
145 | 143 |
processRepositoryLog(); |
146 |
log.info("repository process done"); |
|
144 |
log.info("OpenAIRE repository process done");
|
|
147 | 145 |
removeDoubleClicks(); |
148 |
log.info("removing double clicks done"); |
|
149 |
cleanOAI(); |
|
150 |
log.info("cleaning oai done"); |
|
146 |
log.info("OpenAIRE removing double clicks done"); |
|
151 | 147 |
|
152 |
viewsStats(); |
|
153 |
downloadsStats(); |
|
154 |
|
|
155 | 148 |
processPortalLog(); |
156 | 149 |
log.info("portal process done"); |
157 |
|
|
150 |
|
|
158 | 151 |
portalStats(); |
159 | 152 |
log.info("portal usagestats done"); |
160 | 153 |
|
154 |
cleanOAI(); |
|
155 |
log.info("OpenAIREcleaning oai done"); |
|
156 |
|
|
157 |
viewsStats(); |
|
158 |
log.info("OpenAIRE views stats done"); |
|
159 |
|
|
160 |
downloadsStats(); |
|
161 |
log.info("OpenAIRE downloads stats done"); |
|
162 |
|
|
161 | 163 |
updateProdTables(); |
162 | 164 |
log.info("updateProdTables done"); |
163 | 165 |
|
... | ... | |
177 | 179 |
// throw new Exception("Failed to create usage usagestats: " + e.toString(), e); |
178 | 180 |
// } |
179 | 181 |
// } |
180 |
|
|
181 | 182 |
public void processRepositoryLog() throws Exception { |
182 | 183 |
Statement stmt = ConnectDB.getConnection().createStatement(); |
183 | 184 |
ConnectDB.getConnection().setAutoCommit(false); |
... | ... | |
294 | 295 |
|
295 | 296 |
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats (like pageviews_stats_tmp including all)"; |
296 | 297 |
stmt.executeUpdate(sql); |
297 |
|
|
298 |
|
|
298 | 299 |
stmt.close(); |
299 | 300 |
ConnectDB.getConnection().commit(); |
300 | 301 |
ConnectDB.getConnection().close(); |
... | ... | |
323 | 324 |
// ConnectDB.getConnection().commit(); |
324 | 325 |
// ConnectDB.getConnection().close(); |
325 | 326 |
// } |
326 |
|
|
327 |
private void downloadsStats() throws Exception { |
|
327 |
private void downloadsStats() throws Exception { |
|
328 | 328 |
Statement stmt = ConnectDB.getConnection().createStatement(); |
329 | 329 |
ConnectDB.getConnection().setAutoCommit(false); |
330 | 330 |
|
331 |
//String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
|
|
331 |
//String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
332 | 332 |
String sql = "CREATE OR REPLACE VIEW result_downloads_monthly_tmp as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
333 | 333 |
stmt.executeUpdate(sql); |
334 | 334 |
|
... | ... | |
336 | 336 |
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
337 | 337 |
sql = "CREATE TABLE IF NOT EXISTS downloads_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly_tmp p, public.datasource d, public.result_oids ro where p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
338 | 338 |
stmt.executeUpdate(sql); |
339 |
|
|
339 |
|
|
340 | 340 |
sql = "CREATE TABLE IF NOT EXISTS downloads_stats (like downloads_stats_tmp including all)"; |
341 | 341 |
stmt.executeUpdate(sql); |
342 |
|
|
343 |
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;"; |
|
344 |
stmt.executeUpdate(sql); |
|
345 | 342 |
|
343 |
//sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;"; |
|
344 |
//stmt.executeUpdate(sql); |
|
345 |
|
|
346 | 346 |
stmt.close(); |
347 | 347 |
ConnectDB.getConnection().commit(); |
348 | 348 |
ConnectDB.getConnection().close(); |
... | ... | |
359 | 359 |
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
360 | 360 |
|
361 | 361 |
// String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; |
362 |
String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
|
|
362 |
String sql = "CREATE TABLE IF NOT EXISTS full_dates(full_date TEXT)";
|
|
363 | 363 |
stmt.executeUpdate(sql); |
364 |
|
|
365 |
sql = "INSERT INTO full_dates SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; |
|
366 |
stmt.executeUpdate(sql); |
|
364 | 367 |
|
368 |
//sql = "INSERT INTO public.full_dates SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; |
|
369 |
//stmt.executeUpdate(sql); |
|
370 |
|
|
365 | 371 |
sql = "CREATE INDEX IF NOT EXISTS full_dates_full_date ON full_dates USING btree(full_date);"; |
366 | 372 |
stmt.executeUpdate(sql); |
367 | 373 |
|
... | ... | |
404 | 410 |
// sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
405 | 411 |
sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
406 | 412 |
stmt.executeUpdate(sql); |
413 |
|
|
414 |
sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
415 |
stmt.executeUpdate(sql); |
|
416 |
|
|
417 |
sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM la_downloads_stats_tmp AS ds FULL OUTER JOIN la_views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
418 |
stmt.executeUpdate(sql); |
|
407 | 419 |
|
408 |
sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
|
|
420 |
sql = "INSERT INTO public.usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
|
|
409 | 421 |
stmt.executeUpdate(sql); |
410 | 422 |
|
423 |
sql = "INSERT INTO public.usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM la_downloads_stats_tmp AS ds FULL OUTER JOIN la_views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
424 |
stmt.executeUpdate(sql); |
|
425 |
|
|
411 | 426 |
sql = "CREATE INDEX IF NOT EXISTS usage_stats_source ON usage_stats USING btree(source);"; |
412 | 427 |
stmt.executeUpdate(sql); |
413 | 428 |
|
... | ... | |
419 | 434 |
|
420 | 435 |
sql = "CREATE INDEX IF NOT EXISTS usage_stats_date ON usage_stats USING btree(date);"; |
421 | 436 |
stmt.executeUpdate(sql); |
422 |
|
|
423 |
sql = "DROP TABLE IF EXISTS process_portal_log_tmp;";
|
|
437 |
|
|
438 |
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
|
|
424 | 439 |
stmt.executeUpdate(sql); |
425 | 440 |
|
426 |
sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;";
|
|
441 |
sql = "DROP VIEW IF EXISTS result_views_monthly_tmp";
|
|
427 | 442 |
stmt.executeUpdate(sql); |
428 | 443 |
|
429 |
sql = "DROP VIEW IF EXISTS result_views_monthly_tmp"; |
|
444 |
|
|
445 |
sql = "DROP TABLE IF EXISTS views_stats_tmp;"; |
|
430 | 446 |
stmt.executeUpdate(sql); |
431 | 447 |
|
432 |
sql = "DROP TABLE IF EXISTS views_stats_tmp;"; |
|
433 |
stmt.executeUpdate(sql); |
|
434 |
|
|
435 | 448 |
sql = "DROP TABLE IF EXISTS downloads_stats_tmp;"; |
436 | 449 |
stmt.executeUpdate(sql); |
437 |
|
|
450 |
|
|
451 |
sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;"; |
|
452 |
stmt.executeUpdate(sql); |
|
453 |
|
|
438 | 454 |
sql = "DROP TABLE IF EXISTS process_portal_log_tmp;"; |
439 | 455 |
stmt.executeUpdate(sql); |
440 | 456 |
|
... | ... | |
444 | 460 |
sql = "DROP TABLE IF EXISTS sushilogtmp;"; |
445 | 461 |
stmt.executeUpdate(sql); |
446 | 462 |
|
463 |
sql = "DROP VIEW IF EXISTS la_result_views_monthly_tmp;"; |
|
464 |
stmt.executeUpdate(sql); |
|
465 |
|
|
466 |
sql = "DROP VIEW IF EXISTS la_result_downloads_monthly_tmp;"; |
|
467 |
stmt.executeUpdate(sql); |
|
468 |
|
|
469 |
sql = "DROP TABLE IF EXISTS la_downloads_stats_tmp;"; |
|
470 |
stmt.executeUpdate(sql); |
|
471 |
|
|
472 |
sql = "DROP TABLE IF EXISTS la_views_stats_tmp;"; |
|
473 |
stmt.executeUpdate(sql); |
|
474 |
|
|
475 |
|
|
476 |
sql = "DROP TABLE IF EXISTS lareferencialogtmp;"; |
|
477 |
stmt.executeUpdate(sql); |
|
478 |
|
|
447 | 479 |
stmt.close(); |
448 | 480 |
ConnectDB.getConnection().commit(); |
449 | 481 |
ConnectDB.getConnection().close(); |
... | ... | |
644 | 676 |
stmt.close(); |
645 | 677 |
ConnectDB.getConnection().close(); |
646 | 678 |
} |
647 |
|
|
679 |
|
|
648 | 680 |
public void portalStats() throws SQLException { |
649 | 681 |
Connection con = ConnectDB.getConnection(); |
650 | 682 |
Statement stmt = con.createStatement(); |
... | ... | |
670 | 702 |
stmt = con.createStatement(); |
671 | 703 |
sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'project\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.project_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;"; |
672 | 704 |
stmt.executeUpdate(sql); |
673 |
// stmt.close(); |
|
705 |
// stmt.close();
|
|
674 | 706 |
// con.commit(); |
675 | 707 |
stmt.close(); |
676 | 708 |
ConnectDB.getConnection().commit(); |
... | ... | |
893 | 925 |
Statement stmt = ConnectDB.getConnection().createStatement(); |
894 | 926 |
ConnectDB.getConnection().setAutoCommit(false); |
895 | 927 |
|
896 |
String sql = "insert into piwiklog select * from piwiklogtmp;"; |
|
897 |
stmt.executeUpdate(sql); |
|
898 |
|
|
928 |
String sql = "insert into piwiklog select * from piwiklogtmp;";
|
|
929 |
stmt.executeUpdate(sql);
|
|
930 |
|
|
899 | 931 |
sql = "insert into views_stats select * from views_stats_tmp;"; |
900 | 932 |
stmt.executeUpdate(sql); |
901 |
|
|
933 |
|
|
934 |
sql = "insert into public.views_stats select * from views_stats_tmp;"; |
|
935 |
stmt.executeUpdate(sql); |
|
936 |
|
|
902 | 937 |
sql = "insert into downloads_stats select * from downloads_stats_tmp;"; |
903 | 938 |
stmt.executeUpdate(sql); |
904 | 939 |
|
940 |
sql = "insert into public.downloads_stats select * from downloads_stats_tmp;"; |
|
941 |
stmt.executeUpdate(sql); |
|
942 |
|
|
905 | 943 |
sql = "insert into pageviews_stats select * from pageviews_stats_tmp;"; |
906 | 944 |
stmt.executeUpdate(sql); |
907 | 945 |
|
946 |
sql = "insert into public.pageviews_stats select * from pageviews_stats_tmp;"; |
|
947 |
stmt.executeUpdate(sql); |
|
948 |
|
|
908 | 949 |
stmt.close(); |
909 | 950 |
ConnectDB.getConnection().commit(); |
910 | 951 |
ConnectDB.getConnection().close(); |
... | ... | |
928 | 969 |
|
929 | 970 |
hdfs.close(); |
930 | 971 |
} catch (Exception e) { |
931 |
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath)); |
|
932 |
throw new Exception("HDFS file path with exported data does not exist : " + logPath, e); |
|
972 |
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
|
|
973 |
throw new Exception("HDFS file path with exported data does not exist : " + logRepoPath, e);
|
|
933 | 974 |
} |
934 | 975 |
|
935 | 976 |
return fileNames; |
Also available in: Unified diff
Added LaReferencia Repos to WF