1 |
45524
|
tsampikos.
|
package eu.dnetlib.usagestats.export;
|
2 |
|
|
|
3 |
|
|
import org.apache.hadoop.conf.Configuration;
|
4 |
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
5 |
|
|
import org.apache.hadoop.fs.Path;
|
6 |
|
|
import org.apache.hadoop.fs.FileSystem;
|
7 |
|
|
import org.apache.log4j.Logger;
|
8 |
|
|
|
9 |
|
|
import java.io.*;
|
10 |
|
|
import java.net.URL;
|
11 |
|
|
import java.net.URLConnection;
|
12 |
|
|
import java.sql.Connection;
|
13 |
|
|
import java.sql.DriverManager;
|
14 |
|
|
import java.sql.PreparedStatement;
|
15 |
|
|
import java.sql.ResultSet;
|
16 |
|
|
import java.sql.Statement;
|
17 |
|
|
import java.text.SimpleDateFormat;
|
18 |
|
|
import java.util.Date;
|
19 |
|
|
import java.util.Calendar;
|
20 |
|
|
|
21 |
|
|
public class PiwikDownloadLogs {
|
22 |
|
|
|
23 |
45950
|
tsampikos.
|
private final String piwikUsername;
|
24 |
|
|
private final String piwikPassword;
|
25 |
|
|
private final String httpProtocol;
|
26 |
|
|
private final String piwikUrl;
|
27 |
|
|
private final Date startDate;
|
28 |
|
|
private final String tokenAuth;
|
29 |
|
|
private final String logsPath;
|
30 |
45524
|
tsampikos.
|
|
31 |
45950
|
tsampikos.
|
private final String dbUrl;
|
32 |
|
|
private final String dbUserName;
|
33 |
|
|
private final String dbPassword;
|
34 |
45524
|
tsampikos.
|
|
35 |
|
|
/*
|
36 |
|
|
The Piwik's API method
|
37 |
|
|
*/
|
38 |
|
|
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
|
39 |
|
|
private final String format = "&format=json";
|
40 |
|
|
|
41 |
45950
|
tsampikos.
|
private final Logger log = Logger.getLogger(this.getClass());
|
42 |
45524
|
tsampikos.
|
|
43 |
|
|
|
44 |
45950
|
tsampikos.
|
public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
|
45 |
|
|
this.piwikUsername = username;
|
46 |
|
|
this.piwikPassword = password;
|
47 |
|
|
this.httpProtocol = httpProtocol;
|
48 |
|
|
this.piwikUrl = piwikURl;
|
49 |
45524
|
tsampikos.
|
|
50 |
45950
|
tsampikos.
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
51 |
|
|
this.startDate = sdf.parse(sDate);
|
52 |
45524
|
tsampikos.
|
|
53 |
45950
|
tsampikos.
|
this.tokenAuth = tokenAuth;
|
54 |
|
|
this.logsPath = logsPath;
|
55 |
|
|
this.dbUrl = dbUrl;
|
56 |
|
|
this.dbUserName = dbUsername;
|
57 |
|
|
this.dbPassword = dbPassword;
|
58 |
45524
|
tsampikos.
|
}
|
59 |
|
|
|
60 |
|
|
private String getPiwikLogUrl(){
|
61 |
45950
|
tsampikos.
|
return httpProtocol + "://" + piwikUrl + "/";
|
62 |
45524
|
tsampikos.
|
}
|
63 |
|
|
|
64 |
45950
|
tsampikos.
|
private String getJson(String url,String username, String password) throws Exception {
|
65 |
45524
|
tsampikos.
|
//String cred=username+":"+password;
|
66 |
|
|
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
67 |
|
|
try {
|
68 |
|
|
URL website = new URL(url);
|
69 |
|
|
URLConnection connection = website.openConnection();
|
70 |
|
|
|
71 |
|
|
//connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
72 |
|
|
StringBuilder response;
|
73 |
45950
|
tsampikos.
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
74 |
45524
|
tsampikos.
|
response = new StringBuilder();
|
75 |
|
|
String inputLine;
|
76 |
|
|
while ((inputLine = in.readLine()) != null) {
|
77 |
|
|
response.append(inputLine);
|
78 |
|
|
response.append("\n");
|
79 |
|
|
}
|
80 |
|
|
}
|
81 |
|
|
return response.toString();
|
82 |
|
|
}catch (Exception e){
|
83 |
|
|
log.error("Failed to get URL: " + e);
|
84 |
45950
|
tsampikos.
|
throw new Exception("Failed to get URL: " + e.toString(), e);
|
85 |
45524
|
tsampikos.
|
}
|
86 |
|
|
}
|
87 |
|
|
|
88 |
|
|
public void getPiwikLogs() throws Exception{
|
89 |
45950
|
tsampikos.
|
GetPortalLogs();
|
90 |
45524
|
tsampikos.
|
GetRepositoriesLogs();
|
91 |
|
|
}
|
92 |
|
|
|
93 |
45950
|
tsampikos.
|
private void GetPortalLogs() throws Exception{
|
94 |
45524
|
tsampikos.
|
|
95 |
|
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
96 |
|
|
Calendar start = Calendar.getInstance();
|
97 |
45950
|
tsampikos.
|
start.setTime(startDate);
|
98 |
45524
|
tsampikos.
|
Calendar end = Calendar.getInstance();
|
99 |
|
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
100 |
|
|
//end.setTime(getFinalDate());
|
101 |
|
|
|
102 |
|
|
try{
|
103 |
|
|
log.info("downloading logs for site with piwik_id: 5");
|
104 |
|
|
Class.forName("org.postgresql.Driver");
|
105 |
45950
|
tsampikos.
|
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
|
106 |
55646
|
antonis.le
|
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
|
107 |
45524
|
tsampikos.
|
ResultSet rs_date = st.executeQuery();
|
108 |
55646
|
antonis.le
|
|
109 |
45524
|
tsampikos.
|
while(rs_date.next()){
|
110 |
|
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
111 |
|
|
}
|
112 |
|
|
rs_date.close();
|
113 |
|
|
conn.close();
|
114 |
55646
|
antonis.le
|
|
115 |
45524
|
tsampikos.
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
116 |
|
|
|
117 |
|
|
String period="&period=day&date="+sdf.format(date);
|
118 |
55646
|
antonis.le
|
log.info("Downloading logs for " + sdf.format(date));
|
119 |
45524
|
tsampikos.
|
|
120 |
|
|
|
121 |
55646
|
antonis.le
|
FileSystem fs = FileSystem.get(new Configuration());
|
122 |
|
|
FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
|
123 |
|
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
124 |
|
|
String content = "";
|
125 |
|
|
|
126 |
|
|
int i=0;
|
127 |
|
|
|
128 |
|
|
while(!content.equals("[]\n")) {
|
129 |
|
|
String apiUrl = baseApiUrl;
|
130 |
|
|
|
131 |
|
|
if (i > 0)
|
132 |
|
|
apiUrl += "&filter_offset=" + (i*1000);
|
133 |
|
|
|
134 |
|
|
content = getJson(apiUrl, piwikUsername, piwikPassword);
|
135 |
|
|
|
136 |
|
|
fin.write(content.getBytes());
|
137 |
|
|
|
138 |
45950
|
tsampikos.
|
i++;
|
139 |
45524
|
tsampikos.
|
}
|
140 |
55646
|
antonis.le
|
fin.close();
|
141 |
|
|
//
|
142 |
|
|
//
|
143 |
|
|
//
|
144 |
|
|
//
|
145 |
|
|
//
|
146 |
|
|
//
|
147 |
|
|
// String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
|
148 |
|
|
// String content = getJson(apiUrl,piwikUsername,piwikPassword);
|
149 |
|
|
//
|
150 |
|
|
// //for (int i=1;i<10;i++){
|
151 |
|
|
// int i = 1;
|
152 |
|
|
// while(true) {
|
153 |
|
|
// String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
|
154 |
|
|
// String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
|
155 |
|
|
// content += contentNew;
|
156 |
|
|
// i++;
|
157 |
|
|
// if(contentNew.equals("[]\n")){
|
158 |
|
|
// break;
|
159 |
|
|
// }
|
160 |
|
|
// }
|
161 |
|
|
// flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
|
162 |
45524
|
tsampikos.
|
}
|
163 |
|
|
} catch (Exception e) {
|
164 |
55646
|
antonis.le
|
log.error("Failed to get portal logs", e);
|
165 |
45524
|
tsampikos.
|
throw new Exception("Failed to get portal logs: " + e.toString(), e);
|
166 |
|
|
}
|
167 |
|
|
}
|
168 |
|
|
|
169 |
45950
|
tsampikos.
|
private void GetRepositoriesLogs() throws Exception{
|
170 |
45524
|
tsampikos.
|
|
171 |
|
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
172 |
|
|
Calendar start = Calendar.getInstance();
|
173 |
45950
|
tsampikos.
|
start.setTime(startDate);
|
174 |
45524
|
tsampikos.
|
Calendar end = Calendar.getInstance();
|
175 |
|
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
176 |
|
|
//end.setTime(getFinalDate());
|
177 |
|
|
|
178 |
55646
|
antonis.le
|
Class.forName("org.postgresql.Driver");
|
179 |
|
|
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
|
180 |
|
|
Statement statement = conn.createStatement();
|
181 |
|
|
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
|
182 |
|
|
while(rs.next()){
|
183 |
|
|
int siteId = rs.getInt(1);
|
184 |
|
|
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
|
185 |
|
|
|
186 |
|
|
start.setTime(startDate);
|
187 |
|
|
|
188 |
|
|
log.info("downloading logs for site with piwik_id: " + siteId);
|
189 |
|
|
|
190 |
|
|
st.setInt(1, siteId);
|
191 |
|
|
ResultSet rs_date = st.executeQuery();
|
192 |
|
|
|
193 |
|
|
while(rs_date.next()){
|
194 |
|
|
//log.info("source: " + siteId + " - date: " + rs_date.getString(1));
|
195 |
|
|
if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
|
196 |
|
|
// start = Calendar.getInstance();
|
197 |
|
|
// start.add(Calendar.MONTH, -1);
|
198 |
|
|
// DO NOTHING USE this.startDate!!!
|
199 |
45524
|
tsampikos.
|
}
|
200 |
55646
|
antonis.le
|
else {
|
201 |
|
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
202 |
|
|
}
|
203 |
|
|
}
|
204 |
|
|
rs_date.close();
|
205 |
45524
|
tsampikos.
|
|
206 |
55646
|
antonis.le
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
207 |
45524
|
tsampikos.
|
|
208 |
55646
|
antonis.le
|
log.info("Downloading logs for " + sdf.format(date));
|
209 |
45524
|
tsampikos.
|
|
210 |
55646
|
antonis.le
|
String period="&period=day&date="+sdf.format(date);
|
211 |
|
|
FileSystem fs = FileSystem.get(new Configuration());
|
212 |
|
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
213 |
|
|
String content = "";
|
214 |
45524
|
tsampikos.
|
|
215 |
55646
|
antonis.le
|
int i=0;
|
216 |
45524
|
tsampikos.
|
|
217 |
55646
|
antonis.le
|
while(!content.equals("[]\n")) {
|
218 |
56964
|
antonis.le
|
FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date)) + "_" + i + ".json"), true);
|
219 |
55646
|
antonis.le
|
String apiUrl = baseApiUrl;
|
220 |
|
|
|
221 |
|
|
if (i > 0)
|
222 |
|
|
apiUrl += "&filter_offset=" + (i*1000);
|
223 |
|
|
|
224 |
|
|
content = getJson(apiUrl, piwikUsername, piwikPassword);
|
225 |
|
|
|
226 |
|
|
fin.write(content.getBytes());
|
227 |
56964
|
antonis.le
|
fin.close();
|
228 |
55646
|
antonis.le
|
|
229 |
|
|
i++;
|
230 |
45524
|
tsampikos.
|
}
|
231 |
|
|
}
|
232 |
|
|
}
|
233 |
55646
|
antonis.le
|
rs.close();
|
234 |
|
|
conn.close();
|
235 |
45524
|
tsampikos.
|
}
|
236 |
|
|
|
237 |
55646
|
antonis.le
|
// private void flushString(String data, String destination) throws Exception {
|
238 |
|
|
// FSDataOutputStream fin;
|
239 |
|
|
// try {
|
240 |
|
|
// FileSystem fs = FileSystem.get(new Configuration());
|
241 |
|
|
// fin = fs.create(new Path(destination), true);
|
242 |
|
|
// fin.write(data.getBytes());
|
243 |
|
|
// fin.close();
|
244 |
|
|
// } catch (Exception e) {
|
245 |
|
|
// log.error("Failed to write exported data to a file : ", e);
|
246 |
|
|
// throw new Exception("Failed to write exported data to a file : " + e.toString(), e);
|
247 |
|
|
// }
|
248 |
|
|
// }
|
249 |
45524
|
tsampikos.
|
}
|