1
|
package eu.dnetlib.usagestats.export;
|
2
|
|
3
|
import org.apache.hadoop.conf.Configuration;
|
4
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
5
|
import org.apache.hadoop.fs.Path;
|
6
|
import org.apache.hadoop.fs.FileSystem;
|
7
|
import org.apache.log4j.Logger;
|
8
|
|
9
|
import java.io.*;
|
10
|
import java.net.URL;
|
11
|
import java.net.URLConnection;
|
12
|
import java.sql.Connection;
|
13
|
import java.sql.DriverManager;
|
14
|
import java.sql.PreparedStatement;
|
15
|
import java.sql.ResultSet;
|
16
|
import java.sql.Statement;
|
17
|
import java.text.SimpleDateFormat;
|
18
|
import java.util.Date;
|
19
|
import java.util.Calendar;
|
20
|
|
21
|
public class PiwikDownloadLogs {
|
22
|
|
23
|
private final String piwikUsername;
|
24
|
private final String piwikPassword;
|
25
|
private final String httpProtocol;
|
26
|
private final String piwikUrl;
|
27
|
private final Date startDate;
|
28
|
private final String tokenAuth;
|
29
|
private final String logsPath;
|
30
|
|
31
|
private final String dbUrl;
|
32
|
private final String dbUserName;
|
33
|
private final String dbPassword;
|
34
|
|
35
|
/*
|
36
|
The Piwik's API method
|
37
|
*/
|
38
|
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
|
39
|
private final String format = "&format=json";
|
40
|
|
41
|
private final Logger log = Logger.getLogger(this.getClass());
|
42
|
|
43
|
|
44
|
public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
|
45
|
this.piwikUsername = username;
|
46
|
this.piwikPassword = password;
|
47
|
this.httpProtocol = httpProtocol;
|
48
|
this.piwikUrl = piwikURl;
|
49
|
|
50
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
51
|
this.startDate = sdf.parse(sDate);
|
52
|
|
53
|
this.tokenAuth = tokenAuth;
|
54
|
this.logsPath = logsPath;
|
55
|
this.dbUrl = dbUrl;
|
56
|
this.dbUserName = dbUsername;
|
57
|
this.dbPassword = dbPassword;
|
58
|
}
|
59
|
|
60
|
private String getPiwikLogUrl(){
|
61
|
return httpProtocol + "://" + piwikUrl + "/";
|
62
|
}
|
63
|
|
64
|
private String getJson(String url,String username, String password) throws Exception {
|
65
|
//String cred=username+":"+password;
|
66
|
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
67
|
try {
|
68
|
URL website = new URL(url);
|
69
|
URLConnection connection = website.openConnection();
|
70
|
|
71
|
//connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
72
|
StringBuilder response;
|
73
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
74
|
response = new StringBuilder();
|
75
|
String inputLine;
|
76
|
while ((inputLine = in.readLine()) != null) {
|
77
|
response.append(inputLine);
|
78
|
response.append("\n");
|
79
|
}
|
80
|
}
|
81
|
return response.toString();
|
82
|
}catch (Exception e){
|
83
|
log.error("Failed to get URL: " + e);
|
84
|
throw new Exception("Failed to get URL: " + e.toString(), e);
|
85
|
}
|
86
|
}
|
87
|
|
88
|
public void getPiwikLogs() throws Exception{
|
89
|
GetPortalLogs();
|
90
|
GetRepositoriesLogs();
|
91
|
}
|
92
|
|
93
|
private void GetPortalLogs() throws Exception{
|
94
|
|
95
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
96
|
Calendar start = Calendar.getInstance();
|
97
|
start.setTime(startDate);
|
98
|
Calendar end = Calendar.getInstance();
|
99
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
100
|
//end.setTime(getFinalDate());
|
101
|
|
102
|
try{
|
103
|
log.info("downloading logs for site with piwik_id: 5");
|
104
|
Class.forName("org.postgresql.Driver");
|
105
|
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
|
106
|
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
|
107
|
ResultSet rs_date = st.executeQuery();
|
108
|
|
109
|
while(rs_date.next()){
|
110
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
111
|
}
|
112
|
rs_date.close();
|
113
|
conn.close();
|
114
|
|
115
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
116
|
|
117
|
String period="&period=day&date="+sdf.format(date);
|
118
|
log.info("Downloading logs for " + sdf.format(date));
|
119
|
|
120
|
|
121
|
FileSystem fs = FileSystem.get(new Configuration());
|
122
|
FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
|
123
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
124
|
String content = "";
|
125
|
|
126
|
int i=0;
|
127
|
|
128
|
while(!content.equals("[]\n")) {
|
129
|
String apiUrl = baseApiUrl;
|
130
|
|
131
|
if (i > 0)
|
132
|
apiUrl += "&filter_offset=" + (i*1000);
|
133
|
|
134
|
content = getJson(apiUrl, piwikUsername, piwikPassword);
|
135
|
|
136
|
fin.write(content.getBytes());
|
137
|
|
138
|
i++;
|
139
|
}
|
140
|
fin.close();
|
141
|
//
|
142
|
//
|
143
|
//
|
144
|
//
|
145
|
//
|
146
|
//
|
147
|
// String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
|
148
|
// String content = getJson(apiUrl,piwikUsername,piwikPassword);
|
149
|
//
|
150
|
// //for (int i=1;i<10;i++){
|
151
|
// int i = 1;
|
152
|
// while(true) {
|
153
|
// String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
|
154
|
// String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
|
155
|
// content += contentNew;
|
156
|
// i++;
|
157
|
// if(contentNew.equals("[]\n")){
|
158
|
// break;
|
159
|
// }
|
160
|
// }
|
161
|
// flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
|
162
|
}
|
163
|
} catch (Exception e) {
|
164
|
log.error("Failed to get portal logs", e);
|
165
|
throw new Exception("Failed to get portal logs: " + e.toString(), e);
|
166
|
}
|
167
|
}
|
168
|
|
169
|
private void GetRepositoriesLogs() throws Exception{
|
170
|
|
171
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
172
|
Calendar start = Calendar.getInstance();
|
173
|
start.setTime(startDate);
|
174
|
Calendar end = Calendar.getInstance();
|
175
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
176
|
//end.setTime(getFinalDate());
|
177
|
|
178
|
Class.forName("org.postgresql.Driver");
|
179
|
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
|
180
|
Statement statement = conn.createStatement();
|
181
|
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
|
182
|
while(rs.next()){
|
183
|
int siteId = rs.getInt(1);
|
184
|
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
|
185
|
|
186
|
start.setTime(startDate);
|
187
|
|
188
|
log.info("downloading logs for site with piwik_id: " + siteId);
|
189
|
|
190
|
st.setInt(1, siteId);
|
191
|
ResultSet rs_date = st.executeQuery();
|
192
|
|
193
|
while(rs_date.next()){
|
194
|
//log.info("source: " + siteId + " - date: " + rs_date.getString(1));
|
195
|
if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
|
196
|
// start = Calendar.getInstance();
|
197
|
// start.add(Calendar.MONTH, -1);
|
198
|
// DO NOTHING USE this.startDate!!!
|
199
|
}
|
200
|
else {
|
201
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
202
|
}
|
203
|
}
|
204
|
rs_date.close();
|
205
|
|
206
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
207
|
|
208
|
log.info("Downloading logs for " + sdf.format(date));
|
209
|
|
210
|
String period="&period=day&date="+sdf.format(date);
|
211
|
FileSystem fs = FileSystem.get(new Configuration());
|
212
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
213
|
String content = "";
|
214
|
|
215
|
int i=0;
|
216
|
|
217
|
while(!content.equals("[]\n")) {
|
218
|
FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date)) + "_" + i + ".json"), true);
|
219
|
String apiUrl = baseApiUrl;
|
220
|
|
221
|
if (i > 0)
|
222
|
apiUrl += "&filter_offset=" + (i*1000);
|
223
|
|
224
|
content = getJson(apiUrl, piwikUsername, piwikPassword);
|
225
|
|
226
|
fin.write(content.getBytes());
|
227
|
fin.close();
|
228
|
|
229
|
i++;
|
230
|
}
|
231
|
}
|
232
|
}
|
233
|
rs.close();
|
234
|
conn.close();
|
235
|
}
|
236
|
|
237
|
// private void flushString(String data, String destination) throws Exception {
|
238
|
// FSDataOutputStream fin;
|
239
|
// try {
|
240
|
// FileSystem fs = FileSystem.get(new Configuration());
|
241
|
// fin = fs.create(new Path(destination), true);
|
242
|
// fin.write(data.getBytes());
|
243
|
// fin.close();
|
244
|
// } catch (Exception e) {
|
245
|
// log.error("Failed to write exported data to a file : ", e);
|
246
|
// throw new Exception("Failed to write exported data to a file : " + e.toString(), e);
|
247
|
// }
|
248
|
// }
|
249
|
}
|