Project

General

Profile

1
package eu.dnetlib.usagestats.export;
2

    
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

    
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20

    
21
public class PiwikDownloadLogs {
22

    
23
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30

    
31
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34

    
35
    /*
36
       The Piwik's API method 
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40

    
41
    private final Logger log = Logger.getLogger(this.getClass());
42

    
43

    
44
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49

    
50
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52

    
53
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58
    }
59

    
60
    private String getPiwikLogUrl(){
61
        return httpProtocol + "://" + piwikUrl + "/";
62
    }
63

    
64
    private String getJson(String url,String username, String password) throws Exception {
65
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70

    
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84
            throw new Exception("Failed to get URL: " + e.toString(), e);
85
        }
86
    }
87

    
88
    public void getPiwikLogs() throws Exception{
89
        GetPortalLogs();
90
        GetRepositoriesLogs();
91
    }
92

    
93
    private void GetPortalLogs() throws Exception{
94

    
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97
        start.setTime(startDate);
98
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101

    
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5';");
107
            ResultSet rs_date = st.executeQuery();
108
            while(rs_date.next()){
109
                start.setTime(sdf.parse(rs_date.getString(1)));
110
            }
111
            rs_date.close();
112
            conn.close();
113
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
114

    
115
                String period="&period=day&date="+sdf.format(date);
116

    
117
                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
118
                String content = getJson(apiUrl,piwikUsername,piwikPassword);
119

    
120
                //for (int i=1;i<10;i++){
121
                int i = 1;
122
                while(true) {
123
                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
124
                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
125
                    content += contentNew;
126
                    i++;
127
                    if(contentNew.equals("[]\n")){
128
                        break;
129
                    }
130
                }
131
                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
132
            }
133
        } catch (Exception e) {
134
            log.error(e);
135
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
136
        }
137
    }
138

    
139
    private void GetRepositoriesLogs() throws Exception{
140

    
141
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
142
        Calendar start = Calendar.getInstance();
143
        start.setTime(startDate);
144
        Calendar end = Calendar.getInstance();
145
        end.add(Calendar.DAY_OF_MONTH, -1);
146
        //end.setTime(getFinalDate());
147

    
148
        try{
149
            Class.forName("org.postgresql.Driver");
150
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
151
            Statement statement = conn.createStatement();
152
            ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
153
            while(rs.next()){
154
                start.setTime(startDate);
155
                int siteId = rs.getInt(1);
156
                log.info("downloading logs for site with piwik_id: " + siteId);
157
                PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
158
                st.setInt(1, siteId);
159
                ResultSet rs_date = st.executeQuery();
160
                while(rs_date.next()){
161
                    //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
162
                    if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
163
                        start = Calendar.getInstance();
164
                        start.add(Calendar.MONTH, -6);
165
                    }
166
                    else {
167
                        start.setTime(sdf.parse(rs_date.getString(1)));
168
                    }
169
                }
170
                rs_date.close();
171

    
172
                for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
173

    
174
                    String period="&period=day&date="+sdf.format(date);
175

    
176
                    String apiUrl=getPiwikLogUrl()+APImethod+"&idSite="+siteId+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
177
                    String content = getJson(apiUrl,piwikUsername,piwikPassword);
178

    
179
                    //for (int i=1;i<10;i++){
180
                    int i=1;
181
                    while(true) {
182
                        String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
183
                        String contentNew = getJson(apiUrlnew,piwikUsername,piwikPassword);
184
                        content += contentNew;
185
                        i++;
186
                        if(contentNew.equals("[]\n")){
187
                            break;
188
                        }
189
                    }
190
                    flushString(content, logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date))+".json");
191

    
192
                }
193
            }
194
            rs.close();
195
            conn.close();
196
        } catch (Exception e) {
197
            log.error(e);
198
            throw new Exception("Failed to get repository logs: " + e.toString(), e);
199
        }
200
    }
201

    
202
    private void flushString(String data, String destination) throws Exception {
203
        FSDataOutputStream fin;
204
        try {
205
            FileSystem fs = FileSystem.get(new Configuration());
206
            fin = fs.create(new Path(destination), true);
207
            fin.write(data.getBytes());
208
            fin.close();
209
        } catch (Exception e) {
210
            log.error("Failed  to write exported data to a file : ", e);
211
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
212
        }
213
    }
214
}
(2-2/5)