Project

General

Profile

1
package eu.dnetlib.usagestats.export;
2

    
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

    
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.PreparedStatement;
13
import java.sql.ResultSet;
14
import java.sql.Statement;
15
import java.text.SimpleDateFormat;
16
import java.util.Date;
17
import java.util.Calendar;
18
import org.json.simple.JSONArray;
19
import org.json.simple.JSONObject;
20
import org.json.simple.parser.JSONParser;
21

    
22
public class LaReferenciaDownloadLogs {
23

    
24
    private final String piwikUrl;
25
    private Date startDate;
26
    private final String tokenAuth;
27

    
28
    /*
29
       The Piwik's API method 
30
     */
31
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
32
    private final String format = "&format=json";
33
    private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess";
34

    
35
    private final Logger log = Logger.getLogger(this.getClass());
36

    
37
    public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception {
38
        this.piwikUrl = piwikUrl;
39
        this.tokenAuth = tokenAuth;
40
        this.createTables();
41
        this.createTmpTables();
42
    }
43
    private void createTables() throws Exception {
44
        try {
45
            Statement stmt = ConnectDB.getConnection().createStatement();
46
            String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialog(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
47
            String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
48
                    + " ON INSERT TO lareferencialog "
49
                    + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
50
                    + "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id "
51
                    + "FROM lareferencialog "
52
                    + "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
53
            String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");";
54
            stmt.executeUpdate(sqlCreateTableLareferenciaLog);
55
            stmt.executeUpdate(sqlcreateRuleLaReferenciaLog);
56
            stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog);
57

    
58
            stmt.close();
59
            ConnectDB.getConnection().close();
60
            log.info("Lareferencia Tables Created");
61

    
62
        } catch (Exception e) {
63
            log.error("Failed to create tables: " + e);
64
            throw new Exception("Failed to create tables: " + e.toString(), e);
65
            //System.exit(0);
66
        }
67
    }
68

    
69
    private void createTmpTables() throws Exception {
70

    
71
        try {
72
            Statement stmt = ConnectDB.getConnection().createStatement();
73
            String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
74
            String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
75
                    + " ON INSERT TO lareferencialogtmp "
76
                    + " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit,"
77
                    + "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id "
78
                    + "FROM lareferencialogtmp "
79
                    + "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
80
            stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog);
81
            stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog);
82

    
83
            stmt.close();
84
            log.info("Lareferencia Tmp Tables Created");
85

    
86
        } catch (Exception e) {
87
            log.error("Failed to create tmptables: " + e);
88
            throw new Exception("Failed to create tmp tables: " + e.toString(), e);
89
            //System.exit(0);
90
        }
91
    }
92
    private String getPiwikLogUrl() {
93
        return piwikUrl + "/";
94
    }
95

    
96
    private String getJson(String url) throws Exception {
97
        try {
98
            URL website = new URL(url);
99
            URLConnection connection = website.openConnection();
100

    
101
            StringBuilder response;
102
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
103
                response = new StringBuilder();
104
                String inputLine;
105
                while ((inputLine = in.readLine()) != null) {
106
                    response.append(inputLine);
107
                    response.append("\n");
108
                }
109
            }
110
            return response.toString();
111
        } catch (Exception e) {
112
            log.error("Failed to get URL: " + e);
113
            throw new Exception("Failed to get URL: " + e.toString(), e);
114
        }
115
    }
116

    
117
    public void GetLaReferenciaRepos(String repoLogsPath) throws Exception {
118

    
119
        String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
120
        String content = "";
121

    
122
        content = getJson(baseApiUrl);
123
        JSONParser parser = new JSONParser();
124
        JSONArray jsonArray = (JSONArray) parser.parse(content);
125
        for (Object aJsonArray : jsonArray) {
126
            JSONObject jsonObjectRow = (JSONObject) aJsonArray;
127
            int idSite = Integer.parseInt(jsonObjectRow.get("idsite").toString());
128
            this.GetLaReFerenciaLogs(repoLogsPath, idSite);
129
        }   
130
    }
131

    
132
    public void GetLaReFerenciaLogs(String repoLogsPath,
133
            int laReferencialMatomoID) throws Exception {
134

    
135
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
136

    
137
        Calendar start = Calendar.getInstance();
138
        start.set(Calendar.YEAR, 2020);
139
        start.set(Calendar.MONTH, Calendar.JANUARY);
140
        start.set(Calendar.DAY_OF_MONTH, 1);
141

    
142
        Calendar end = Calendar.getInstance();
143
        end.add(Calendar.DAY_OF_MONTH, -1);
144

    
145
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
146
        PreparedStatement st = ConnectDB.getConnection().prepareStatement("SELECT max(timestamp) FROM lareferencialog WHERE matomoid=? HAVING max(timestamp) is not null;");
147
        st.setInt(1, laReferencialMatomoID);
148

    
149
        ResultSet rs_date = st.executeQuery();
150
        while (rs_date.next()) {
151
            if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
152
                start.setTime(sdf.parse(rs_date.getString(1)));
153
            }
154
        }
155
        rs_date.close();
156

    
157
        for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
158
            log.info("Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for " + sdf.format(date));
159

    
160
            String period = "&period=day&date=" + sdf.format(date);
161
            String outFolder = "";
162
            outFolder = repoLogsPath;
163

    
164
            FileSystem fs = FileSystem.get(new Configuration());
165

    
166
            String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
167
            String content = "";
168

    
169
            int i = 0;
170

    
171
            while (!content.equals("[]\n")) {
172

    
173
                FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + laReferencialMatomoID + "_LaRefPiwiklog" + sdf.format((date)) + "_" + i + ".json"), true);
174
                String apiUrl = baseApiUrl;
175

    
176
                if (i > 0) {
177
                    apiUrl += "&filter_offset=" + (i * 1000);
178
                }
179

    
180
                content = getJson(apiUrl);
181

    
182
                fin.write(content.getBytes());
183

    
184
                i++;
185
                fin.close();
186
            }
187
            //fin.close();
188
            //out.close();
189

    
190
        }
191

    
192
        // }
193
    }
194
}
(4-4/11)