Project

General

Profile

1 45524 tsampikos.
package eu.dnetlib.usagestats.export;
2
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20
21
public class PiwikDownloadLogs {
22
23 45950 tsampikos.
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30 45524 tsampikos.
31 45950 tsampikos.
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34 45524 tsampikos.
35
    /*
36
       The Piwik's API method
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40
41 45950 tsampikos.
    private final Logger log = Logger.getLogger(this.getClass());
42 45524 tsampikos.
43
44 45950 tsampikos.
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49 45524 tsampikos.
50 45950 tsampikos.
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52 45524 tsampikos.
53 45950 tsampikos.
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58 45524 tsampikos.
    }
59
60
    private String getPiwikLogUrl(){
61 45950 tsampikos.
        return httpProtocol + "://" + piwikUrl + "/";
62 45524 tsampikos.
    }
63
64 45950 tsampikos.
    private String getJson(String url,String username, String password) throws Exception {
65 45524 tsampikos.
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73 45950 tsampikos.
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74 45524 tsampikos.
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84 45950 tsampikos.
            throw new Exception("Failed to get URL: " + e.toString(), e);
85 45524 tsampikos.
        }
86
    }
87
88
    public void getPiwikLogs() throws Exception{
89 45950 tsampikos.
        GetPortalLogs();
90 45524 tsampikos.
        GetRepositoriesLogs();
91
    }
92
93 45950 tsampikos.
    private void GetPortalLogs() throws Exception{
94 45524 tsampikos.
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97 45950 tsampikos.
        start.setTime(startDate);
98 45524 tsampikos.
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105 45950 tsampikos.
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106 55646 antonis.le
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
107 45524 tsampikos.
            ResultSet rs_date = st.executeQuery();
108 55646 antonis.le
109 45524 tsampikos.
            while(rs_date.next()){
110
                start.setTime(sdf.parse(rs_date.getString(1)));
111
            }
112
            rs_date.close();
113
            conn.close();
114 55646 antonis.le
115 45524 tsampikos.
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
116
117
                String period="&period=day&date="+sdf.format(date);
118 55646 antonis.le
                log.info("Downloading logs for " + sdf.format(date));
119 45524 tsampikos.
120
121 55646 antonis.le
                FileSystem fs = FileSystem.get(new Configuration());
122
                FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
123
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
124
                String content = "";
125
126
                int i=0;
127
128
                while(!content.equals("[]\n")) {
129
                    String apiUrl = baseApiUrl;
130
131
                    if (i > 0)
132
                        apiUrl += "&filter_offset=" + (i*1000);
133
134
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
135
136
                    fin.write(content.getBytes());
137
138 45950 tsampikos.
                    i++;
139 45524 tsampikos.
                }
140 55646 antonis.le
                fin.close();
141
//
142
//
143
//
144
//
145
//
146
//
147
//                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
148
//                String content = getJson(apiUrl,piwikUsername,piwikPassword);
149
//
150
//                //for (int i=1;i<10;i++){
151
//                int i = 1;
152
//                while(true) {
153
//                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
154
//                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
155
//                    content += contentNew;
156
//                    i++;
157
//                    if(contentNew.equals("[]\n")){
158
//                        break;
159
//                    }
160
//                }
161
//                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
162 45524 tsampikos.
            }
163
        } catch (Exception e) {
164 55646 antonis.le
            log.error("Failed to get portal logs", e);
165 45524 tsampikos.
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
166
        }
167
    }
168
169 45950 tsampikos.
    private void GetRepositoriesLogs() throws Exception{
170 45524 tsampikos.
171
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
172
        Calendar start = Calendar.getInstance();
173 45950 tsampikos.
        start.setTime(startDate);
174 45524 tsampikos.
        Calendar end = Calendar.getInstance();
175
        end.add(Calendar.DAY_OF_MONTH, -1);
176
        //end.setTime(getFinalDate());
177
178 55646 antonis.le
        Class.forName("org.postgresql.Driver");
179
        Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
180
        Statement statement = conn.createStatement();
181
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
182
        while(rs.next()){
183
            int siteId = rs.getInt(1);
184
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
185
186
            start.setTime(startDate);
187
188
            log.info("downloading logs for site with piwik_id: " + siteId);
189
190
            st.setInt(1, siteId);
191
            ResultSet rs_date = st.executeQuery();
192
193
            while(rs_date.next()){
194
                //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
195
                if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
196
//                        start = Calendar.getInstance();
197
//                        start.add(Calendar.MONTH, -1);
198
                    // DO NOTHING USE this.startDate!!!
199 45524 tsampikos.
                }
200 55646 antonis.le
                else {
201
                    start.setTime(sdf.parse(rs_date.getString(1)));
202
                }
203
            }
204
            rs_date.close();
205 45524 tsampikos.
206 55646 antonis.le
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
207 45524 tsampikos.
208 55646 antonis.le
                log.info("Downloading logs for " + sdf.format(date));
209 45524 tsampikos.
210 55646 antonis.le
                String period="&period=day&date="+sdf.format(date);
211
                FileSystem fs = FileSystem.get(new Configuration());
212
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
213
                String content = "";
214 45524 tsampikos.
215 55646 antonis.le
                int i=0;
216 45524 tsampikos.
217 55646 antonis.le
                while(!content.equals("[]\n")) {
218 56964 antonis.le
                    FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date)) + "_" + i + ".json"), true);
219 55646 antonis.le
                    String apiUrl = baseApiUrl;
220
221
                    if (i > 0)
222
                        apiUrl += "&filter_offset=" + (i*1000);
223
224
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
225
226
                    fin.write(content.getBytes());
227 56964 antonis.le
                    fin.close();
228 55646 antonis.le
229
                    i++;
230 45524 tsampikos.
                }
231
            }
232
        }
233 55646 antonis.le
        rs.close();
234
        conn.close();
235 45524 tsampikos.
    }
236
237 55646 antonis.le
//    private void flushString(String data, String destination) throws Exception {
238
//        FSDataOutputStream fin;
239
//        try {
240
//            FileSystem fs = FileSystem.get(new Configuration());
241
//            fin = fs.create(new Path(destination), true);
242
//            fin.write(data.getBytes());
243
//            fin.close();
244
//        } catch (Exception e) {
245
//            log.error("Failed  to write exported data to a file : ", e);
246
//            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
247
//        }
248
//    }
249 45524 tsampikos.
}