Revision 58415
Added by Antonis Lempesis almost 4 years ago
PiwikDownloadLogs.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import org.apache.hadoop.conf.Configuration; |
|
4 |
import org.apache.hadoop.fs.FSDataOutputStream; |
|
5 |
import org.apache.hadoop.fs.Path; |
|
6 |
import org.apache.hadoop.fs.FileSystem; |
|
7 |
import org.apache.log4j.Logger; |
|
8 |
|
|
9 |
import java.io.*; |
|
10 |
import java.net.URL; |
|
11 |
import java.net.URLConnection; |
|
12 |
import java.sql.PreparedStatement; |
|
13 |
import java.sql.ResultSet; |
|
14 |
import java.sql.Statement; |
|
15 |
import java.text.SimpleDateFormat; |
|
16 |
import java.util.Date; |
|
17 |
import java.util.Calendar; |
|
18 |
|
|
19 |
public class PiwikDownloadLogs { |
|
20 |
|
|
21 |
private final String piwikUrl; |
|
22 |
private Date startDate; |
|
23 |
private final String tokenAuth; |
|
24 |
|
|
25 |
/* |
|
26 |
The Piwik's API method |
|
27 |
*/ |
|
28 |
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails"; |
|
29 |
private final String format = "&format=json"; |
|
30 |
|
|
31 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
32 |
|
|
33 |
public PiwikDownloadLogs(String piwikUrl, String tokenAuth) { |
|
34 |
this.piwikUrl = piwikUrl; |
|
35 |
this.tokenAuth = tokenAuth; |
|
36 |
|
|
37 |
} |
|
38 |
|
|
39 |
private String getPiwikLogUrl() { |
|
40 |
return "https://" + piwikUrl + "/"; |
|
41 |
} |
|
42 |
|
|
43 |
private String getJson(String url) throws Exception { |
|
44 |
try { |
|
45 |
URL website = new URL(url); |
|
46 |
URLConnection connection = website.openConnection(); |
|
47 |
|
|
48 |
//connection.setRequestProperty ("Authorization", "Basic "+encoded); |
|
49 |
StringBuilder response; |
|
50 |
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { |
|
51 |
response = new StringBuilder(); |
|
52 |
String inputLine; |
|
53 |
while ((inputLine = in.readLine()) != null) { |
|
54 |
response.append(inputLine); |
|
55 |
response.append("\n"); |
|
56 |
} |
|
57 |
} |
|
58 |
return response.toString(); |
|
59 |
} catch (Exception e) { |
|
60 |
log.error("Failed to get URL: " + e); |
|
61 |
throw new Exception("Failed to get URL: " + e.toString(), e); |
|
62 |
} |
|
63 |
} |
|
64 |
|
|
65 |
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception { |
|
66 |
|
|
67 |
Statement statement = ConnectDB.getConnection().createStatement(); |
|
68 |
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource order by piwik_id;"); |
|
69 |
PreparedStatement st = ConnectDB.getConnection().prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;"); |
|
70 |
|
|
71 |
while (rs.next()) { |
|
72 |
int siteId = rs.getInt(1); |
|
73 |
st.setInt(1, siteId); |
|
74 |
ResultSet rs_date = st.executeQuery(); |
|
75 |
|
|
76 |
while (rs_date.next()) { |
|
77 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
78 |
Date dateMax = sdf.parse(rs_date.getString(1)); |
|
79 |
//Date dateMax = sdf.parse("2020-02-01"); |
|
80 |
Calendar start = Calendar.getInstance(); |
|
81 |
start.setTime(dateMax); |
|
82 |
Calendar end = Calendar.getInstance(); |
|
83 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
84 |
|
|
85 |
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { |
|
86 |
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date)); |
|
87 |
|
|
88 |
String period = "&period=day&date=" + sdf.format(date); |
|
89 |
String outFolder = ""; |
|
90 |
//portal siteId = 109; |
|
91 |
if (siteId == Integer.parseInt(portalMatomoID)) { |
|
92 |
outFolder = portalLogPath; |
|
93 |
} else { |
|
94 |
outFolder = repoLogsPath; |
|
95 |
} |
|
96 |
FileSystem fs = FileSystem.get(new Configuration()); |
|
97 |
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); |
|
98 |
|
|
99 |
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; |
|
100 |
String content = ""; |
|
101 |
|
|
102 |
int i = 0; |
|
103 |
|
|
104 |
while (!content.equals("[]\n")) { |
|
105 |
String apiUrl = baseApiUrl; |
|
106 |
|
|
107 |
if (i > 0) { |
|
108 |
apiUrl += "&filter_offset=" + (i * 1000); |
|
109 |
} |
|
110 |
|
|
111 |
content = getJson(apiUrl); |
|
112 |
|
|
113 |
fin.write(content.getBytes()); |
|
114 |
|
|
115 |
i++; |
|
116 |
} |
|
117 |
fin.close(); |
|
118 |
} |
|
119 |
} |
|
120 |
} |
|
121 |
} |
|
122 |
} |
Also available in: Unified diff
a ton of fixes. Close to running smoothly in all cases