Project

General

Profile

« Previous | Next » 

Revision 59067

Added by Dimitris Pierrakos almost 4 years ago

Added LaReferencia Repos to WF

View differences:

PiwikStatsDB.java
25 25

  
26 26
public class PiwikStatsDB {
27 27

  
28
    private String logPath;
29 28
    private String logRepoPath;
30 29
    private String logPortalPath;
31 30

  
......
35 34
    private String CounterRobotsURL;
36 35
    private ArrayList robotsList;
37 36

  
38

  
39 37
    public PiwikStatsDB(String logRepoPath, String logPortalPath) throws Exception {
40 38
        this.logRepoPath = logRepoPath;
41 39
        this.logPortalPath = logPortalPath;
......
44 42
    }
45 43

  
46 44
    public void foo() {
47
        Stream<String> s = Arrays.stream(new String[] {"a", "b", "c", "d"});
45
        Stream<String> s = Arrays.stream(new String[]{"a", "b", "c", "d"});
48 46

  
49 47
        System.out.println(s.parallel().count());
50 48
    }
......
143 141
            this.robotsList = counterRobots.getRobotsPatterns();
144 142

  
145 143
            processRepositoryLog();
146
            log.info("repository process done");
144
            log.info("OpenAIRE repository process done");
147 145
            removeDoubleClicks();
148
            log.info("removing double clicks done");
149
            cleanOAI();
150
            log.info("cleaning oai done");
146
            log.info("OpenAIRE removing double clicks done");
151 147

  
152
            viewsStats();
153
            downloadsStats();
154

  
155 148
            processPortalLog();
156 149
            log.info("portal process done");
157
            
150

  
158 151
            portalStats();
159 152
            log.info("portal usagestats done");
160 153

  
154
            cleanOAI();
155
            log.info("OpenAIREcleaning oai done");
156

  
157
            viewsStats();
158
            log.info("OpenAIRE views stats done");
159

  
160
            downloadsStats();
161
            log.info("OpenAIRE downloads stats done");
162

  
161 163
            updateProdTables();
162 164
            log.info("updateProdTables done");
163 165

  
......
177 179
//            throw new Exception("Failed to create usage usagestats: " + e.toString(), e);
178 180
//        }
179 181
//    }
180

  
181 182
    public void processRepositoryLog() throws Exception {
182 183
        Statement stmt = ConnectDB.getConnection().createStatement();
183 184
        ConnectDB.getConnection().setAutoCommit(false);
......
294 295

  
295 296
        sql = "CREATE TABLE IF NOT EXISTS pageviews_stats (like pageviews_stats_tmp including all)";
296 297
        stmt.executeUpdate(sql);
297
        
298

  
298 299
        stmt.close();
299 300
        ConnectDB.getConnection().commit();
300 301
        ConnectDB.getConnection().close();
......
323 324
//        ConnectDB.getConnection().commit();
324 325
//        ConnectDB.getConnection().close();
325 326
//    }
326

  
327
     private void downloadsStats() throws Exception {
327
    private void downloadsStats() throws Exception {
328 328
        Statement stmt = ConnectDB.getConnection().createStatement();
329 329
        ConnectDB.getConnection().setAutoCommit(false);
330 330

  
331
         //String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
331
        //String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
332 332
        String sql = "CREATE OR REPLACE VIEW result_downloads_monthly_tmp as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
333 333
        stmt.executeUpdate(sql);
334 334

  
......
336 336
//        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
337 337
        sql = "CREATE TABLE IF NOT EXISTS downloads_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly_tmp p, public.datasource d, public.result_oids ro where p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
338 338
        stmt.executeUpdate(sql);
339
        
339

  
340 340
        sql = "CREATE TABLE IF NOT EXISTS downloads_stats (like downloads_stats_tmp including all)";
341 341
        stmt.executeUpdate(sql);
342
        
343
        sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;";
344
        stmt.executeUpdate(sql);
345 342

  
343
        //sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;";
344
        //stmt.executeUpdate(sql);
345

  
346 346
        stmt.close();
347 347
        ConnectDB.getConnection().commit();
348 348
        ConnectDB.getConnection().close();
......
359 359
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
360 360

  
361 361
//        String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
362
        String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
362
        String sql = "CREATE TABLE IF NOT EXISTS full_dates(full_date TEXT)";
363 363
        stmt.executeUpdate(sql);
364
        
365
        sql = "INSERT INTO full_dates SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
366
        stmt.executeUpdate(sql);
364 367

  
368
        //sql = "INSERT INTO public.full_dates SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
369
        //stmt.executeUpdate(sql);
370
        
365 371
        sql = "CREATE INDEX IF NOT EXISTS full_dates_full_date ON full_dates USING btree(full_date);";
366 372
        stmt.executeUpdate(sql);
367 373

  
......
404 410
//        sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
405 411
        sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
406 412
        stmt.executeUpdate(sql);
413

  
414
        sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
415
        stmt.executeUpdate(sql);
416

  
417
        sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM la_downloads_stats_tmp AS ds FULL OUTER JOIN la_views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
418
        stmt.executeUpdate(sql);
407 419
        
408
        sql = "INSERT INTO usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
420
        sql = "INSERT INTO public.usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats_tmp AS ds FULL OUTER JOIN views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
409 421
        stmt.executeUpdate(sql);
410 422

  
423
        sql = "INSERT INTO public.usage_stats SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM la_downloads_stats_tmp AS ds FULL OUTER JOIN la_views_stats_tmp AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
424
        stmt.executeUpdate(sql);
425
        
411 426
        sql = "CREATE INDEX IF NOT EXISTS usage_stats_source ON usage_stats USING btree(source);";
412 427
        stmt.executeUpdate(sql);
413 428

  
......
419 434

  
420 435
        sql = "CREATE INDEX IF NOT EXISTS usage_stats_date ON usage_stats USING btree(date);";
421 436
        stmt.executeUpdate(sql);
422
       
423
        sql = "DROP TABLE IF EXISTS process_portal_log_tmp;";
437

  
438
        sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
424 439
        stmt.executeUpdate(sql);
425 440

  
426
        sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;";
441
        sql = "DROP VIEW IF EXISTS  result_views_monthly_tmp";
427 442
        stmt.executeUpdate(sql);
428 443
        
429
        sql = "DROP VIEW IF EXISTS  result_views_monthly_tmp";
444
        
445
        sql = "DROP TABLE IF EXISTS views_stats_tmp;";
430 446
        stmt.executeUpdate(sql);
431 447

  
432
        sql = "DROP TABLE IF EXISTS views_stats_tmp;";
433
        stmt.executeUpdate(sql);
434
        
435 448
        sql = "DROP TABLE IF EXISTS downloads_stats_tmp;";
436 449
        stmt.executeUpdate(sql);
437
        
450
       
451
        sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;";
452
        stmt.executeUpdate(sql);
453

  
438 454
        sql = "DROP TABLE IF EXISTS process_portal_log_tmp;";
439 455
        stmt.executeUpdate(sql);
440 456

  
......
444 460
        sql = "DROP TABLE IF EXISTS sushilogtmp;";
445 461
        stmt.executeUpdate(sql);
446 462

  
463
        sql = "DROP VIEW IF EXISTS la_result_views_monthly_tmp;";
464
        stmt.executeUpdate(sql);
465

  
466
        sql = "DROP VIEW IF EXISTS la_result_downloads_monthly_tmp;";
467
        stmt.executeUpdate(sql);
468

  
469
        sql = "DROP TABLE IF EXISTS la_downloads_stats_tmp;";
470
        stmt.executeUpdate(sql);
471
        
472
        sql = "DROP TABLE IF EXISTS la_views_stats_tmp;";
473
        stmt.executeUpdate(sql);
474

  
475
        
476
        sql = "DROP TABLE IF EXISTS lareferencialogtmp;";
477
        stmt.executeUpdate(sql);
478

  
447 479
        stmt.close();
448 480
        ConnectDB.getConnection().commit();
449 481
        ConnectDB.getConnection().close();
......
644 676
        stmt.close();
645 677
        ConnectDB.getConnection().close();
646 678
    }
647
 
679

  
648 680
    public void portalStats() throws SQLException {
649 681
        Connection con = ConnectDB.getConnection();
650 682
        Statement stmt = con.createStatement();
......
670 702
        stmt = con.createStatement();
671 703
        sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'project\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.project_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;";
672 704
        stmt.executeUpdate(sql);
673
 //       stmt.close();
705
        //       stmt.close();
674 706
//        con.commit();
675 707
        stmt.close();
676 708
        ConnectDB.getConnection().commit();
......
893 925
        Statement stmt = ConnectDB.getConnection().createStatement();
894 926
        ConnectDB.getConnection().setAutoCommit(false);
895 927

  
896
       String sql = "insert into piwiklog select * from piwiklogtmp;";
897
       stmt.executeUpdate(sql);
898
        
928
        String sql = "insert into piwiklog select * from piwiklogtmp;";
929
        stmt.executeUpdate(sql);
930

  
899 931
        sql = "insert into views_stats select * from views_stats_tmp;";
900 932
        stmt.executeUpdate(sql);
901
        
933

  
934
        sql = "insert into public.views_stats select * from views_stats_tmp;";
935
        stmt.executeUpdate(sql);
936

  
902 937
        sql = "insert into downloads_stats select * from downloads_stats_tmp;";
903 938
        stmt.executeUpdate(sql);
904 939
        
940
        sql = "insert into public.downloads_stats select * from downloads_stats_tmp;";
941
        stmt.executeUpdate(sql);
942

  
905 943
        sql = "insert into pageviews_stats select * from pageviews_stats_tmp;";
906 944
        stmt.executeUpdate(sql);
907 945

  
946
        sql = "insert into public.pageviews_stats select * from pageviews_stats_tmp;";
947
        stmt.executeUpdate(sql);
948

  
908 949
        stmt.close();
909 950
        ConnectDB.getConnection().commit();
910 951
        ConnectDB.getConnection().close();
......
928 969

  
929 970
            hdfs.close();
930 971
        } catch (Exception e) {
931
            log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
932
            throw new Exception("HDFS file path with exported data does not exist :   " + logPath, e);
972
            log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
973
            throw new Exception("HDFS file path with exported data does not exist :   " + logRepoPath, e);
933 974
        }
934 975

  
935 976
        return fileNames;

Also available in: Unified diff