Project

General

Profile

1
package eu.dnetlib.usagestats.export;
2

    
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

    
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20

    
21
public class PiwikDownloadLogs {
22

    
23
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30

    
31
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34

    
35
    /*
36
       The Piwik's API method 
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40

    
41
    private final Logger log = Logger.getLogger(this.getClass());
42

    
43

    
44
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49

    
50
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52

    
53
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58
    }
59

    
60
    private String getPiwikLogUrl(){
61
        return httpProtocol + "://" + piwikUrl + "/";
62
    }
63

    
64
    private String getJson(String url,String username, String password) throws Exception {
65
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70

    
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84
            throw new Exception("Failed to get URL: " + e.toString(), e);
85
        }
86
    }
87

    
88
    public void getPiwikLogs() throws Exception{
89
        GetPortalLogs();
90
        GetRepositoriesLogs();
91
    }
92

    
93
    private void GetPortalLogs() throws Exception{
94

    
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97
        start.setTime(startDate);
98
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101

    
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
107
            ResultSet rs_date = st.executeQuery();
108

    
109
            while(rs_date.next()){
110
                start.setTime(sdf.parse(rs_date.getString(1)));
111
            }
112
            rs_date.close();
113
            conn.close();
114

    
115
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
116

    
117
                String period="&period=day&date="+sdf.format(date);
118
                log.info("Downloading logs for " + sdf.format(date));
119

    
120

    
121
                FileSystem fs = FileSystem.get(new Configuration());
122
                FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
123
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
124
                String content = "";
125

    
126
                int i=0;
127

    
128
                while(!content.equals("[]\n")) {
129
                    String apiUrl = baseApiUrl;
130

    
131
                    if (i > 0)
132
                        apiUrl += "&filter_offset=" + (i*1000);
133

    
134
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
135

    
136
                    fin.write(content.getBytes());
137

    
138
                    i++;
139
                }
140
                fin.close();
141
//
142
//
143
//
144
//
145
//
146
//
147
//                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
148
//                String content = getJson(apiUrl,piwikUsername,piwikPassword);
149
//
150
//                //for (int i=1;i<10;i++){
151
//                int i = 1;
152
//                while(true) {
153
//                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
154
//                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
155
//                    content += contentNew;
156
//                    i++;
157
//                    if(contentNew.equals("[]\n")){
158
//                        break;
159
//                    }
160
//                }
161
//                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
162
            }
163
        } catch (Exception e) {
164
            log.error("Failed to get portal logs", e);
165
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
166
        }
167
    }
168

    
169
    private void GetRepositoriesLogs() throws Exception{
170

    
171
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
172
        Calendar start = Calendar.getInstance();
173
        start.setTime(startDate);
174
        Calendar end = Calendar.getInstance();
175
        end.add(Calendar.DAY_OF_MONTH, -1);
176
        //end.setTime(getFinalDate());
177

    
178
        Class.forName("org.postgresql.Driver");
179
        Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
180
        Statement statement = conn.createStatement();
181
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
182
        while(rs.next()){
183
            int siteId = rs.getInt(1);
184
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
185

    
186
            start.setTime(startDate);
187

    
188
            log.info("downloading logs for site with piwik_id: " + siteId);
189

    
190
            st.setInt(1, siteId);
191
            ResultSet rs_date = st.executeQuery();
192

    
193
            while(rs_date.next()){
194
                //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
195
                if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
196
//                        start = Calendar.getInstance();
197
//                        start.add(Calendar.MONTH, -1);
198
                    // DO NOTHING USE this.startDate!!!
199
                }
200
                else {
201
                    start.setTime(sdf.parse(rs_date.getString(1)));
202
                }
203
            }
204
            rs_date.close();
205

    
206
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
207

    
208
                log.info("Downloading logs for " + sdf.format(date));
209

    
210
                String period="&period=day&date="+sdf.format(date);
211
                FileSystem fs = FileSystem.get(new Configuration());
212
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
213
                String content = "";
214

    
215
                int i=0;
216

    
217
                while(!content.equals("[]\n")) {
218
                    FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date)) + "_" + i + ".json"), true);
219
                    String apiUrl = baseApiUrl;
220

    
221
                    if (i > 0)
222
                        apiUrl += "&filter_offset=" + (i*1000);
223

    
224
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
225

    
226
                    fin.write(content.getBytes());
227
                    fin.close();
228

    
229
                    i++;
230
                }
231
            }
232
        }
233
        rs.close();
234
        conn.close();
235
    }
236

    
237
//    private void flushString(String data, String destination) throws Exception {
238
//        FSDataOutputStream fin;
239
//        try {
240
//            FileSystem fs = FileSystem.get(new Configuration());
241
//            fin = fs.create(new Path(destination), true);
242
//            fin.write(data.getBytes());
243
//            fin.close();
244
//        } catch (Exception e) {
245
//            log.error("Failed  to write exported data to a file : ", e);
246
//            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
247
//        }
248
//    }
249
}
(2-2/5)