1
|
package eu.dnetlib.usagestats.export;
|
2
|
|
3
|
import org.apache.hadoop.conf.Configuration;
|
4
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
5
|
import org.apache.hadoop.fs.Path;
|
6
|
import org.apache.hadoop.fs.FileSystem;
|
7
|
import org.apache.log4j.Logger;
|
8
|
|
9
|
import java.io.*;
|
10
|
import java.net.URL;
|
11
|
import java.net.URLConnection;
|
12
|
import java.sql.PreparedStatement;
|
13
|
import java.sql.ResultSet;
|
14
|
import java.sql.Statement;
|
15
|
import java.text.SimpleDateFormat;
|
16
|
import java.util.Date;
|
17
|
import java.util.Calendar;
|
18
|
import org.json.simple.JSONArray;
|
19
|
import org.json.simple.JSONObject;
|
20
|
import org.json.simple.parser.JSONParser;
|
21
|
|
22
|
public class LaReferenciaDownloadLogs {
|
23
|
|
24
|
private final String piwikUrl;
|
25
|
private Date startDate;
|
26
|
private final String tokenAuth;
|
27
|
|
28
|
/*
|
29
|
The Piwik's API method
|
30
|
*/
|
31
|
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
|
32
|
private final String format = "&format=json";
|
33
|
private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess";
|
34
|
|
35
|
private final Logger log = Logger.getLogger(this.getClass());
|
36
|
|
37
|
public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception {
|
38
|
this.piwikUrl = piwikUrl;
|
39
|
this.tokenAuth = tokenAuth;
|
40
|
this.createTables();
|
41
|
this.createTmpTables();
|
42
|
}
|
43
|
private void createTables() throws Exception {
|
44
|
try {
|
45
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
46
|
String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialog(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
|
47
|
String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
48
|
+ " ON INSERT TO lareferencialog "
|
49
|
+ " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
|
50
|
+ "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id "
|
51
|
+ "FROM lareferencialog "
|
52
|
+ "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
|
53
|
String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");";
|
54
|
stmt.executeUpdate(sqlCreateTableLareferenciaLog);
|
55
|
stmt.executeUpdate(sqlcreateRuleLaReferenciaLog);
|
56
|
stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog);
|
57
|
|
58
|
stmt.close();
|
59
|
ConnectDB.getConnection().close();
|
60
|
log.info("Lareferencia Tables Created");
|
61
|
|
62
|
} catch (Exception e) {
|
63
|
log.error("Failed to create tables: " + e);
|
64
|
throw new Exception("Failed to create tables: " + e.toString(), e);
|
65
|
//System.exit(0);
|
66
|
}
|
67
|
}
|
68
|
|
69
|
private void createTmpTables() throws Exception {
|
70
|
|
71
|
try {
|
72
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
73
|
String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
|
74
|
String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
75
|
+ " ON INSERT TO lareferencialogtmp "
|
76
|
+ " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit,"
|
77
|
+ "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id "
|
78
|
+ "FROM lareferencialogtmp "
|
79
|
+ "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
|
80
|
stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog);
|
81
|
stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog);
|
82
|
|
83
|
stmt.close();
|
84
|
log.info("Lareferencia Tmp Tables Created");
|
85
|
|
86
|
} catch (Exception e) {
|
87
|
log.error("Failed to create tmptables: " + e);
|
88
|
throw new Exception("Failed to create tmp tables: " + e.toString(), e);
|
89
|
//System.exit(0);
|
90
|
}
|
91
|
}
|
92
|
private String getPiwikLogUrl() {
|
93
|
return piwikUrl + "/";
|
94
|
}
|
95
|
|
96
|
private String getJson(String url) throws Exception {
|
97
|
try {
|
98
|
URL website = new URL(url);
|
99
|
URLConnection connection = website.openConnection();
|
100
|
|
101
|
StringBuilder response;
|
102
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
103
|
response = new StringBuilder();
|
104
|
String inputLine;
|
105
|
while ((inputLine = in.readLine()) != null) {
|
106
|
response.append(inputLine);
|
107
|
response.append("\n");
|
108
|
}
|
109
|
}
|
110
|
return response.toString();
|
111
|
} catch (Exception e) {
|
112
|
log.error("Failed to get URL: " + e);
|
113
|
throw new Exception("Failed to get URL: " + e.toString(), e);
|
114
|
}
|
115
|
}
|
116
|
|
117
|
public void GetLaReferenciaRepos(String repoLogsPath) throws Exception {
|
118
|
|
119
|
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
|
120
|
String content = "";
|
121
|
|
122
|
content = getJson(baseApiUrl);
|
123
|
JSONParser parser = new JSONParser();
|
124
|
JSONArray jsonArray = (JSONArray) parser.parse(content);
|
125
|
for (Object aJsonArray : jsonArray) {
|
126
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
127
|
int idSite = Integer.parseInt(jsonObjectRow.get("idsite").toString());
|
128
|
this.GetLaReFerenciaLogs(repoLogsPath, idSite);
|
129
|
}
|
130
|
}
|
131
|
|
132
|
public void GetLaReFerenciaLogs(String repoLogsPath,
|
133
|
int laReferencialMatomoID) throws Exception {
|
134
|
|
135
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
136
|
|
137
|
Calendar start = Calendar.getInstance();
|
138
|
start.set(Calendar.YEAR, 2020);
|
139
|
start.set(Calendar.MONTH, Calendar.JANUARY);
|
140
|
start.set(Calendar.DAY_OF_MONTH, 1);
|
141
|
|
142
|
Calendar end = Calendar.getInstance();
|
143
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
144
|
|
145
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
146
|
PreparedStatement st = ConnectDB.getConnection().prepareStatement("SELECT max(timestamp) FROM lareferencialog WHERE matomoid=? HAVING max(timestamp) is not null;");
|
147
|
st.setInt(1, laReferencialMatomoID);
|
148
|
|
149
|
ResultSet rs_date = st.executeQuery();
|
150
|
while (rs_date.next()) {
|
151
|
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
|
152
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
153
|
}
|
154
|
}
|
155
|
rs_date.close();
|
156
|
|
157
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
158
|
log.info("Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for " + sdf.format(date));
|
159
|
|
160
|
String period = "&period=day&date=" + sdf.format(date);
|
161
|
String outFolder = "";
|
162
|
outFolder = repoLogsPath;
|
163
|
|
164
|
FileSystem fs = FileSystem.get(new Configuration());
|
165
|
|
166
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
167
|
String content = "";
|
168
|
|
169
|
int i = 0;
|
170
|
|
171
|
while (!content.equals("[]\n")) {
|
172
|
|
173
|
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + laReferencialMatomoID + "_LaRefPiwiklog" + sdf.format((date)) + "_" + i + ".json"), true);
|
174
|
String apiUrl = baseApiUrl;
|
175
|
|
176
|
if (i > 0) {
|
177
|
apiUrl += "&filter_offset=" + (i * 1000);
|
178
|
}
|
179
|
|
180
|
content = getJson(apiUrl);
|
181
|
|
182
|
fin.write(content.getBytes());
|
183
|
|
184
|
i++;
|
185
|
fin.close();
|
186
|
}
|
187
|
//fin.close();
|
188
|
//out.close();
|
189
|
|
190
|
}
|
191
|
|
192
|
// }
|
193
|
}
|
194
|
}
|