Revision 58057
Added by Dimitris Pierrakos about 4 years ago
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v3/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import org.apache.hadoop.conf.Configuration; |
|
4 |
import org.apache.hadoop.fs.FSDataOutputStream; |
|
5 |
import org.apache.hadoop.fs.Path; |
|
6 |
import org.apache.hadoop.fs.FileSystem; |
|
7 |
import org.apache.log4j.Logger; |
|
8 |
|
|
9 |
import java.io.*; |
|
10 |
import java.net.URL; |
|
11 |
import java.net.URLConnection; |
|
12 |
import java.sql.Connection; |
|
13 |
import java.sql.DriverManager; |
|
14 |
import java.sql.PreparedStatement; |
|
15 |
import java.sql.ResultSet; |
|
16 |
import java.sql.Statement; |
|
17 |
import java.text.SimpleDateFormat; |
|
18 |
import java.util.Date; |
|
19 |
import java.util.Calendar; |
|
20 |
|
|
21 |
public class PiwikDownloadLogs { |
|
22 |
|
|
23 |
private final String piwikUsername; |
|
24 |
private final String piwikPassword; |
|
25 |
private final String httpProtocol; |
|
26 |
private final String piwikUrl; |
|
27 |
private final Date startDate; |
|
28 |
private final String tokenAuth; |
|
29 |
private final String logsPath; |
|
30 |
|
|
31 |
private final String dbUrl; |
|
32 |
private final String dbUserName; |
|
33 |
private final String dbPassword; |
|
34 |
|
|
35 |
/* |
|
36 |
The Piwik's API method |
|
37 |
*/ |
|
38 |
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails"; |
|
39 |
private final String format = "&format=json"; |
|
40 |
|
|
41 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
42 |
|
|
43 |
|
|
44 |
public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{ |
|
45 |
this.piwikUsername = username; |
|
46 |
this.piwikPassword = password; |
|
47 |
this.httpProtocol = httpProtocol; |
|
48 |
this.piwikUrl = piwikURl; |
|
49 |
|
|
50 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
51 |
this.startDate = sdf.parse(sDate); |
|
52 |
|
|
53 |
this.tokenAuth = tokenAuth; |
|
54 |
this.logsPath = logsPath; |
|
55 |
this.dbUrl = dbUrl; |
|
56 |
this.dbUserName = dbUsername; |
|
57 |
this.dbPassword = dbPassword; |
|
58 |
} |
|
59 |
|
|
60 |
private String getPiwikLogUrl(){ |
|
61 |
return httpProtocol + "://" + piwikUrl + "/"; |
|
62 |
} |
|
63 |
|
|
64 |
private String getJson(String url,String username, String password) throws Exception { |
|
65 |
//String cred=username+":"+password; |
|
66 |
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes()); |
|
67 |
try { |
|
68 |
URL website = new URL(url); |
|
69 |
URLConnection connection = website.openConnection(); |
|
70 |
|
|
71 |
//connection.setRequestProperty ("Authorization", "Basic "+encoded); |
|
72 |
StringBuilder response; |
|
73 |
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { |
|
74 |
response = new StringBuilder(); |
|
75 |
String inputLine; |
|
76 |
while ((inputLine = in.readLine()) != null) { |
|
77 |
response.append(inputLine); |
|
78 |
response.append("\n"); |
|
79 |
} |
|
80 |
} |
|
81 |
return response.toString(); |
|
82 |
}catch (Exception e){ |
|
83 |
log.error("Failed to get URL: " + e); |
|
84 |
throw new Exception("Failed to get URL: " + e.toString(), e); |
|
85 |
} |
|
86 |
} |
|
87 |
|
|
88 |
public void getPiwikLogs() throws Exception{ |
|
89 |
GetPortalLogs(); |
|
90 |
GetRepositoriesLogs(); |
|
91 |
} |
|
92 |
|
|
93 |
private void GetPortalLogs() throws Exception{ |
|
94 |
|
|
95 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
96 |
Calendar start = Calendar.getInstance(); |
|
97 |
start.setTime(startDate); |
|
98 |
Calendar end = Calendar.getInstance(); |
|
99 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
100 |
//end.setTime(getFinalDate()); |
|
101 |
|
|
102 |
try{ |
|
103 |
log.info("downloading logs for site with piwik_id: 5"); |
|
104 |
Class.forName("org.postgresql.Driver"); |
|
105 |
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword); |
|
106 |
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;"); |
|
107 |
ResultSet rs_date = st.executeQuery(); |
|
108 |
|
|
109 |
while(rs_date.next()){ |
|
110 |
start.setTime(sdf.parse(rs_date.getString(1))); |
|
111 |
} |
|
112 |
rs_date.close(); |
|
113 |
conn.close(); |
|
114 |
|
|
115 |
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { |
|
116 |
|
|
117 |
String period="&period=day&date="+sdf.format(date); |
|
118 |
log.info("Downloading logs for " + sdf.format(date)); |
|
119 |
|
|
120 |
|
|
121 |
FileSystem fs = FileSystem.get(new Configuration()); |
|
122 |
FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true); |
|
123 |
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; |
|
124 |
String content = ""; |
|
125 |
|
|
126 |
int i=0; |
|
127 |
|
|
128 |
while(!content.equals("[]\n")) { |
|
129 |
String apiUrl = baseApiUrl; |
|
130 |
|
|
131 |
if (i > 0) |
|
132 |
apiUrl += "&filter_offset=" + (i*1000); |
|
133 |
|
|
134 |
content = getJson(apiUrl, piwikUsername, piwikPassword); |
|
135 |
|
|
136 |
fin.write(content.getBytes()); |
|
137 |
|
|
138 |
i++; |
|
139 |
} |
|
140 |
fin.close(); |
|
141 |
// |
|
142 |
// |
|
143 |
// |
|
144 |
// |
|
145 |
// |
|
146 |
// |
|
147 |
// String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth; |
|
148 |
// String content = getJson(apiUrl,piwikUsername,piwikPassword); |
|
149 |
// |
|
150 |
// //for (int i=1;i<10;i++){ |
|
151 |
// int i = 1; |
|
152 |
// while(true) { |
|
153 |
// String apiUrlnew=apiUrl+"&filter_offset="+i*1000; |
|
154 |
// String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername); |
|
155 |
// content += contentNew; |
|
156 |
// i++; |
|
157 |
// if(contentNew.equals("[]\n")){ |
|
158 |
// break; |
|
159 |
// } |
|
160 |
// } |
|
161 |
// flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"); |
|
162 |
} |
|
163 |
} catch (Exception e) { |
|
164 |
log.error("Failed to get portal logs", e); |
|
165 |
throw new Exception("Failed to get portal logs: " + e.toString(), e); |
|
166 |
} |
|
167 |
} |
|
168 |
|
|
169 |
private void GetRepositoriesLogs() throws Exception{ |
|
170 |
|
|
171 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
172 |
Calendar start = Calendar.getInstance(); |
|
173 |
start.setTime(startDate); |
|
174 |
Calendar end = Calendar.getInstance(); |
|
175 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
176 |
//end.setTime(getFinalDate()); |
|
177 |
|
|
178 |
Class.forName("org.postgresql.Driver"); |
|
179 |
Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword); |
|
180 |
Statement statement = conn.createStatement(); |
|
181 |
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;"); |
|
182 |
while(rs.next()){ |
|
183 |
int siteId = rs.getInt(1); |
|
184 |
PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;"); |
|
185 |
|
|
186 |
start.setTime(startDate); |
|
187 |
|
|
188 |
log.info("downloading logs for site with piwik_id: " + siteId); |
|
189 |
|
|
190 |
st.setInt(1, siteId); |
|
191 |
ResultSet rs_date = st.executeQuery(); |
|
192 |
|
|
193 |
while(rs_date.next()){ |
|
194 |
//log.info("source: " + siteId + " - date: " + rs_date.getString(1)); |
|
195 |
if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) { |
|
196 |
// start = Calendar.getInstance(); |
|
197 |
// start.add(Calendar.MONTH, -1); |
|
198 |
// DO NOTHING USE this.startDate!!! |
|
199 |
} |
|
200 |
else { |
|
201 |
start.setTime(sdf.parse(rs_date.getString(1))); |
|
202 |
} |
|
203 |
} |
|
204 |
rs_date.close(); |
|
205 |
|
|
206 |
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { |
|
207 |
|
|
208 |
log.info("Downloading logs for " + sdf.format(date)); |
|
209 |
|
|
210 |
String period="&period=day&date="+sdf.format(date); |
|
211 |
FileSystem fs = FileSystem.get(new Configuration()); |
|
212 |
FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date))+".json"), true); |
|
213 |
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; |
|
214 |
String content = ""; |
|
215 |
|
|
216 |
int i=0; |
|
217 |
|
|
218 |
while(!content.equals("[]\n")) { |
|
219 |
String apiUrl = baseApiUrl; |
|
220 |
|
|
221 |
if (i > 0) |
|
222 |
apiUrl += "&filter_offset=" + (i*1000); |
|
223 |
|
|
224 |
content = getJson(apiUrl, piwikUsername, piwikPassword); |
|
225 |
|
|
226 |
fin.write(content.getBytes()); |
|
227 |
|
|
228 |
i++; |
|
229 |
} |
|
230 |
fin.close(); |
|
231 |
|
|
232 |
} |
|
233 |
} |
|
234 |
rs.close(); |
|
235 |
conn.close(); |
|
236 |
} |
|
237 |
|
|
238 |
// private void flushString(String data, String destination) throws Exception { |
|
239 |
// FSDataOutputStream fin; |
|
240 |
// try { |
|
241 |
// FileSystem fs = FileSystem.get(new Configuration()); |
|
242 |
// fin = fs.create(new Path(destination), true); |
|
243 |
// fin.write(data.getBytes()); |
|
244 |
// fin.close(); |
|
245 |
// } catch (Exception e) { |
|
246 |
// log.error("Failed to write exported data to a file : ", e); |
|
247 |
// throw new Exception("Failed to write exported data to a file : " + e.toString(), e); |
|
248 |
// } |
|
249 |
// } |
|
250 |
} |
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v3/src/main/java/eu/dnetlib/usagestats/export/IrusStats.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import java.io.*; |
|
4 |
//import java.io.BufferedReader; |
|
5 |
//import java.io.InputStreamReader; |
|
6 |
import java.net.URL; |
|
7 |
import java.net.URLConnection; |
|
8 |
import java.sql.ResultSet; |
|
9 |
import java.text.SimpleDateFormat; |
|
10 |
import java.util.Date; |
|
11 |
import java.util.Calendar; |
|
12 |
|
|
13 |
import java.sql.Connection; |
|
14 |
import java.sql.DriverManager; |
|
15 |
import java.sql.PreparedStatement; |
|
16 |
import java.sql.Statement; |
|
17 |
|
|
18 |
import org.json.simple.JSONArray; |
|
19 |
import org.json.simple.JSONObject; |
|
20 |
import org.json.simple.parser.JSONParser; |
|
21 |
|
|
22 |
import org.apache.log4j.Logger; |
|
23 |
|
|
24 |
/** |
|
25 |
* Created by tsampikos on 28/3/2017. |
|
26 |
*/ |
|
27 |
public class IrusStats { |
|
28 |
private final String dbUrl; |
|
29 |
private final String dbSchema; |
|
30 |
private final String dbUserName; |
|
31 |
private final String dbPassword; |
|
32 |
|
|
33 |
private Connection conn = null; |
|
34 |
private Statement stmt = null; |
|
35 |
|
|
36 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
37 |
|
|
38 |
public IrusStats(String dbUrl, String dbUsername, String dbPassword) throws Exception { |
|
39 |
this.dbUrl = dbUrl; |
|
40 |
this.dbSchema = "shadow"; |
|
41 |
this.dbUserName = dbUsername; |
|
42 |
this.dbPassword = dbPassword; |
|
43 |
|
|
44 |
connectDB(); |
|
45 |
createTables(); |
|
46 |
} |
|
47 |
|
|
48 |
private void connectDB() throws Exception { |
|
49 |
try { |
|
50 |
Class.forName("org.postgresql.Driver"); |
|
51 |
conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword); |
|
52 |
|
|
53 |
stmt = conn.createStatement(); |
|
54 |
String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";"; |
|
55 |
stmt.executeUpdate(sqlSetSearchPath); |
|
56 |
|
|
57 |
log.info("Opened database successfully"); |
|
58 |
|
|
59 |
} catch (Exception e) { |
|
60 |
log.error("Connect to db failed: " + e); |
|
61 |
throw new Exception("Failed to connect to db: " + e.toString(), e); |
|
62 |
} |
|
63 |
} |
|
64 |
|
|
65 |
private void createTables() throws Exception { |
|
66 |
try { |
|
67 |
|
|
68 |
stmt = conn.createStatement(); |
|
69 |
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));"; |
|
70 |
stmt.executeUpdate(sqlCreateTableSushiLog); |
|
71 |
|
|
72 |
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;"); |
|
73 |
|
|
74 |
String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;"; |
|
75 |
stmt.executeUpdate(sqlCopyPublicSushiLog); |
|
76 |
|
|
77 |
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + |
|
78 |
" ON INSERT TO sushilog " + |
|
79 |
" WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," + |
|
80 |
"sushilog.rid, sushilog.date " + |
|
81 |
"FROM sushilog " + |
|
82 |
"WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;"; |
|
83 |
stmt.executeUpdate(sqlcreateRuleSushiLog); |
|
84 |
|
|
85 |
stmt.close(); |
|
86 |
conn.close(); |
|
87 |
log.info("Sushi Tables Created"); |
|
88 |
} catch (Exception e) { |
|
89 |
log.error("Failed to create tables: " + e); |
|
90 |
throw new Exception("Failed to create tables: " + e.toString(), e); |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
public void irusStats() throws Exception { |
|
95 |
if (conn.isClosed()) |
|
96 |
connectDB(); |
|
97 |
|
|
98 |
stmt = conn.createStatement(); |
|
99 |
conn.setAutoCommit(false); |
|
100 |
|
|
101 |
//String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'"; |
|
102 |
//String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'"; |
|
103 |
//String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total';"; |
|
104 |
String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total' AND s.source='IRUS-UK';"; |
|
105 |
stmt.executeUpdate(sql); |
|
106 |
|
|
107 |
|
|
108 |
stmt.close(); |
|
109 |
conn.commit(); |
|
110 |
conn.close(); |
|
111 |
} |
|
112 |
|
|
113 |
public void processIrusRRReport() throws Exception { |
|
114 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); |
|
115 |
String reportUrl = "https://irus.jisc.ac.uk" + "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" + simpleDateFormat.format(new Date()) + "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback="; |
|
116 |
|
|
117 |
log.info("Getting Irus report: " + reportUrl); |
|
118 |
|
|
119 |
String text = getJson(reportUrl, "", ""); |
|
120 |
|
|
121 |
log.info("Report: " + text); |
|
122 |
|
|
123 |
JSONParser parser = new JSONParser(); |
|
124 |
JSONObject jsonObject = (JSONObject) parser.parse(text); |
|
125 |
jsonObject = (JSONObject) jsonObject.get("ReportResponse"); |
|
126 |
jsonObject = (JSONObject) jsonObject.get("Report"); |
|
127 |
jsonObject = (JSONObject) jsonObject.get("Report"); |
|
128 |
jsonObject = (JSONObject) jsonObject.get("Customer"); |
|
129 |
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems"); |
|
130 |
int i = 0; |
|
131 |
for (Object aJsonArray : jsonArray) { |
|
132 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
133 |
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier"); |
|
134 |
for (Object identifier : itemIdentifier) { |
|
135 |
JSONObject opendoar = (JSONObject) identifier; |
|
136 |
if(opendoar.get("Type").toString().equals("OpenDOAR")){ |
|
137 |
//System.out.println(i + ": " + opendoar.get("Value").toString()); |
|
138 |
log.info(i + ": " + opendoar.get("Value").toString()); |
|
139 |
i++; |
|
140 |
processIrusIRReport(opendoar.get("Value").toString()); |
|
141 |
break; |
|
142 |
} |
|
143 |
} |
|
144 |
//break; |
|
145 |
} |
|
146 |
} |
|
147 |
|
|
148 |
private void processIrusIRReport(String opendoar) throws Exception { |
|
149 |
if (conn.isClosed()) |
|
150 |
connectDB(); |
|
151 |
|
|
152 |
conn.setAutoCommit(false); |
|
153 |
|
|
154 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); |
|
155 |
|
|
156 |
Calendar start = Calendar.getInstance(); |
|
157 |
start.set(Calendar.YEAR, 2016); |
|
158 |
start.set(Calendar.MONTH, Calendar.JANUARY); |
|
159 |
//start.setTime(simpleDateFormat.parse("2016-01")); |
|
160 |
|
|
161 |
Calendar end = Calendar.getInstance(); |
|
162 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
163 |
|
|
164 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
165 |
PreparedStatement st = conn.prepareStatement("SELECT max(date) FROM public.sushilog WHERE repository=?;"); |
|
166 |
st.setString(1, "opendoar____::" + opendoar); |
|
167 |
ResultSet rs_date = st.executeQuery(); |
|
168 |
while(rs_date.next()){ |
|
169 |
if(rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { |
|
170 |
start.setTime(sdf.parse(rs_date.getString(1))); |
|
171 |
} |
|
172 |
} |
|
173 |
rs_date.close(); |
|
174 |
|
|
175 |
PreparedStatement preparedStatement = conn.prepareStatement("INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)"); |
|
176 |
int batch_size = 0; |
|
177 |
|
|
178 |
while(start.before(end)){ |
|
179 |
//log.info("date: " + simpleDateFormat.format(start.getTime())); |
|
180 |
String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) + "&RepositoryIdentifier=opendoar%3A" + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback="; |
|
181 |
start.add(Calendar.MONTH, 1); |
|
182 |
|
|
183 |
String text = getJson(reportUrl, "", ""); |
|
184 |
if(text == null){ |
|
185 |
continue; |
|
186 |
} |
|
187 |
|
|
188 |
JSONParser parser = new JSONParser(); |
|
189 |
JSONObject jsonObject = (JSONObject) parser.parse(text); |
|
190 |
jsonObject = (JSONObject) jsonObject.get("ReportResponse"); |
|
191 |
jsonObject = (JSONObject) jsonObject.get("Report"); |
|
192 |
jsonObject = (JSONObject) jsonObject.get("Report"); |
|
193 |
jsonObject = (JSONObject) jsonObject.get("Customer"); |
|
194 |
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems"); |
|
195 |
if(jsonArray == null){ |
|
196 |
continue; |
|
197 |
} |
|
198 |
String oai = ""; |
|
199 |
for (Object aJsonArray : jsonArray) { |
|
200 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
201 |
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier"); |
|
202 |
for (Object identifier : itemIdentifier) { |
|
203 |
JSONObject oaiPmh = (JSONObject) identifier; |
|
204 |
if(oaiPmh.get("Type").toString().equals("OAI")){ |
|
205 |
oai = oaiPmh .get("Value").toString(); |
|
206 |
//System.out.println("OAI: " + oai); |
|
207 |
break; |
|
208 |
} |
|
209 |
} |
|
210 |
|
|
211 |
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance"); |
|
212 |
String period; |
|
213 |
String type; |
|
214 |
String count; |
|
215 |
for (Object perf : itemPerformance) { |
|
216 |
JSONObject performance = (JSONObject) perf; |
|
217 |
JSONObject periodObj = (JSONObject) performance.get("Period"); |
|
218 |
period = periodObj.get("Begin").toString(); |
|
219 |
JSONObject instanceObj = (JSONObject) performance.get("Instance"); |
|
220 |
type = instanceObj.get("MetricType").toString(); |
|
221 |
count = instanceObj.get("Count").toString(); |
|
222 |
//System.out.println(oai + " : " + period + " : " + count); |
|
223 |
|
|
224 |
preparedStatement.setString(1, "IRUS-UK"); |
|
225 |
preparedStatement.setString(2, "opendoar____::" + opendoar); |
|
226 |
preparedStatement.setString(3, oai); |
|
227 |
preparedStatement.setString(4, period); |
|
228 |
preparedStatement.setString(5, type); |
|
229 |
preparedStatement.setInt(6, Integer.parseInt(count)); |
|
230 |
preparedStatement.addBatch(); |
|
231 |
batch_size++; |
|
232 |
if(batch_size == 10000){ |
|
233 |
preparedStatement.executeBatch(); |
|
234 |
conn.commit(); |
|
235 |
batch_size = 0; |
|
236 |
} |
|
237 |
} |
|
238 |
//break; |
|
239 |
} |
|
240 |
//break; |
|
241 |
} |
|
242 |
|
|
243 |
preparedStatement.executeBatch(); |
|
244 |
conn.commit(); |
|
245 |
conn.close(); |
|
246 |
} |
|
247 |
|
|
248 |
private String getJson(String url,String username, String password) throws Exception { |
|
249 |
//String cred=username+":"+password; |
|
250 |
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes()); |
|
251 |
try { |
|
252 |
URL website = new URL(url); |
|
253 |
URLConnection connection = website.openConnection(); |
|
254 |
//connection.setRequestProperty ("Authorization", "Basic "+encoded); |
|
255 |
StringBuilder response; |
|
256 |
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { |
|
257 |
response = new StringBuilder(); |
|
258 |
String inputLine; |
|
259 |
while ((inputLine = in.readLine()) != null) { |
|
260 |
response.append(inputLine); |
|
261 |
response.append("\n"); |
|
262 |
} |
|
263 |
} |
|
264 |
return response.toString(); |
|
265 |
}catch (Exception e){ |
|
266 |
log.error("Failed to get URL", e); |
|
267 |
return null; |
|
268 |
} |
|
269 |
} |
|
270 |
|
|
271 |
/* |
|
272 |
private void flushString(String data, String destination) throws Exception { |
|
273 |
FSDataOutputStream fin; |
|
274 |
try { |
|
275 |
FileSystem fs = FileSystem.get(new Configuration()); |
|
276 |
fin = fs.create(new Path(destination), true); |
|
277 |
fin.write(data.getBytes()); |
|
278 |
fin.close(); |
|
279 |
} catch (Exception e) { |
|
280 |
log.error("Failed to write exported data to a file : ", e); |
|
281 |
throw new Exception("Failed to write exported data to a file : " + e.toString(), e); |
|
282 |
} |
|
283 |
} |
|
284 |
*/ |
|
285 |
} |
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v3/src/main/java/eu/dnetlib/usagestats/export/SarcStats.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import java.io.*; |
|
4 |
//import java.io.BufferedReader; |
|
5 |
//import java.io.InputStreamReader; |
|
6 |
import java.net.URL; |
|
7 |
import java.net.URLConnection; |
|
8 |
import java.sql.ResultSet; |
|
9 |
import java.text.SimpleDateFormat; |
|
10 |
import java.util.Date; |
|
11 |
import java.util.Calendar; |
|
12 |
|
|
13 |
import java.sql.Connection; |
|
14 |
import java.sql.DriverManager; |
|
15 |
import java.sql.PreparedStatement; |
|
16 |
import java.sql.Statement; |
|
17 |
|
|
18 |
import org.json.simple.JSONArray; |
|
19 |
import org.json.simple.JSONObject; |
|
20 |
import org.json.simple.parser.JSONParser; |
|
21 |
|
|
22 |
import org.apache.log4j.Logger; |
|
23 |
/** |
|
24 |
* Created by tsampikos on 27/6/2017. |
|
25 |
*/ |
|
26 |
|
|
27 |
public class SarcStats { |
|
28 |
|
|
29 |
private final String dbUrl; |
|
30 |
private final String dbSchema; |
|
31 |
private final String dbUserName; |
|
32 |
private final String dbPassword; |
|
33 |
|
|
34 |
private Connection conn = null; |
|
35 |
private Statement stmt = null; |
|
36 |
|
|
37 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
38 |
|
|
39 |
public SarcStats(String dbUrl, String dbUsername, String dbPassword) throws Exception { |
|
40 |
this.dbUrl = dbUrl; |
|
41 |
this.dbSchema = "shadow"; |
|
42 |
this.dbUserName = dbUsername; |
|
43 |
this.dbPassword = dbPassword; |
|
44 |
|
|
45 |
connectDB(); |
|
46 |
createTables(); |
|
47 |
} |
|
48 |
|
|
49 |
private void connectDB() throws Exception { |
|
50 |
try { |
|
51 |
Class.forName("org.postgresql.Driver"); |
|
52 |
conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword); |
|
53 |
|
|
54 |
stmt = conn.createStatement(); |
|
55 |
String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";"; |
|
56 |
stmt.executeUpdate(sqlSetSearchPath); |
|
57 |
|
|
58 |
//log.info("Opened database successfully") |
|
59 |
|
|
60 |
} catch (Exception e) { |
|
61 |
//log.error("Connect to db failed: " + e); |
|
62 |
throw new Exception("Failed to connect to db: " + e.toString(), e); |
|
63 |
} |
|
64 |
} |
|
65 |
|
|
66 |
private void createTables() throws Exception { |
|
67 |
try { |
|
68 |
|
|
69 |
stmt = conn.createStatement(); |
|
70 |
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));"; |
|
71 |
stmt.executeUpdate(sqlCreateTableSushiLog); |
|
72 |
|
|
73 |
//String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;"; |
|
74 |
//stmt.executeUpdate(sqlCopyPublicSushiLog); |
|
75 |
|
|
76 |
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + |
|
77 |
" ON INSERT TO sushilog " + |
|
78 |
" WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," + |
|
79 |
"sushilog.rid, sushilog.date " + |
|
80 |
"FROM sushilog " + |
|
81 |
"WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;"; |
|
82 |
stmt.executeUpdate(sqlcreateRuleSushiLog); |
|
83 |
|
|
84 |
stmt.close(); |
|
85 |
conn.close(); |
|
86 |
log.info("Sushi Tables Created"); |
|
87 |
} catch (Exception e) { |
|
88 |
log.error("Failed to create tables: " + e); |
|
89 |
throw new Exception("Failed to create tables: " + e.toString(), e); |
|
90 |
} |
|
91 |
} |
|
92 |
|
|
93 |
|
|
94 |
public void processSarc() throws Exception { |
|
95 |
processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X"); |
|
96 |
processARReport("http://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X"); |
|
97 |
processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335"); |
|
98 |
processARReport("http://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030"); |
|
99 |
processARReport("http://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781"); |
|
100 |
//processARReport("http://revistas.rcaap.pt/boletimspm/sushiLite/v1_7/"); |
|
101 |
processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529"); |
|
102 |
processARReport("http://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027"); |
|
103 |
processARReport("http://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474"); |
|
104 |
processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099"); |
|
105 |
processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187"); |
|
106 |
processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X"); |
|
107 |
processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799"); |
|
108 |
processARReport("http://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098"); |
|
109 |
processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754"); |
|
110 |
processARReport("http://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794"); |
|
111 |
processARReport("http://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826"); |
|
112 |
processARReport("http://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015"); |
|
113 |
|
|
114 |
//processARReport("http://www.bad.pt/publicacoes/index.php/cadernos/sushiLite/v1_7/"); |
|
115 |
} |
|
116 |
|
|
117 |
public void sarcStats() throws Exception { |
|
118 |
if (conn.isClosed()) |
|
119 |
connectDB(); |
|
120 |
|
|
121 |
stmt = conn.createStatement(); |
|
122 |
conn.setAutoCommit(false); |
|
123 |
|
|
124 |
//String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total'"; |
|
125 |
String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, datasource_results dr, result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';"; |
|
126 |
stmt.executeUpdate(sql); |
|
127 |
|
|
128 |
|
|
129 |
stmt.close(); |
|
130 |
conn.commit(); |
|
131 |
conn.close(); |
|
132 |
} |
|
133 |
|
|
134 |
public void processARReport(String url, String issn) throws Exception { |
|
135 |
log.info("Processing SARC! issn: " + issn + " with url: " + url); |
|
136 |
if (conn.isClosed()) |
|
137 |
connectDB(); |
|
138 |
|
|
139 |
conn.setAutoCommit(false); |
|
140 |
|
|
141 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); |
|
142 |
|
|
143 |
Calendar start = Calendar.getInstance(); |
|
144 |
start.set(Calendar.YEAR, 2016); |
|
145 |
start.set(Calendar.MONTH, Calendar.JANUARY); |
|
146 |
//start.setTime(simpleDateFormat.parse("2016-01")); |
|
147 |
|
|
148 |
Calendar end = Calendar.getInstance(); |
|
149 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
150 |
|
|
151 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
152 |
PreparedStatement st = conn.prepareStatement("SELECT max(date) FROM public.sushilog WHERE repository=?;"); |
|
153 |
st.setString(1, issn); |
|
154 |
ResultSet rs_date = st.executeQuery(); |
|
155 |
while(rs_date.next()){ |
|
156 |
if(rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { |
|
157 |
start.setTime(sdf.parse(rs_date.getString(1))); |
|
158 |
} |
|
159 |
} |
|
160 |
rs_date.close(); |
|
161 |
|
|
162 |
PreparedStatement preparedStatement = conn.prepareStatement("INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)"); |
|
163 |
int batch_size = 0; |
|
164 |
|
|
165 |
while(start.before(end)){ |
|
166 |
//String reportUrl = "http://irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) + "&RepositoryIdentifier=opendoar%3A" + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback="; |
|
167 |
String reportUrl = url + "GetReport/?Report=AR1&Format=json&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()); |
|
168 |
//System.out.println(reportUrl); |
|
169 |
start.add(Calendar.MONTH, 1); |
|
170 |
|
|
171 |
String text = getJson(reportUrl, "", ""); |
|
172 |
if(text == null){ |
|
173 |
continue; |
|
174 |
} |
|
175 |
|
|
176 |
/* |
|
177 |
PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) + ".json")); |
|
178 |
wr.print(text); |
|
179 |
wr.close(); |
|
180 |
*/ |
|
181 |
|
|
182 |
JSONParser parser = new JSONParser(); |
|
183 |
JSONObject jsonObject = (JSONObject) parser.parse(text); |
|
184 |
jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse"); |
|
185 |
jsonObject = (JSONObject) jsonObject.get("sc:Report"); |
|
186 |
if(jsonObject == null){ |
|
187 |
continue; |
|
188 |
} |
|
189 |
jsonObject = (JSONObject) jsonObject.get("c:Report"); |
|
190 |
jsonObject = (JSONObject) jsonObject.get("c:Customer"); |
|
191 |
Object obj = jsonObject.get("c:ReportItems"); |
|
192 |
JSONArray jsonArray = new JSONArray(); |
|
193 |
if(obj instanceof JSONObject){ |
|
194 |
jsonArray.add(obj); |
|
195 |
} |
|
196 |
else{ |
|
197 |
jsonArray = (JSONArray) obj; |
|
198 |
//jsonArray = (JSONArray) jsonObject.get("c:ReportItems"); |
|
199 |
} |
|
200 |
if(jsonArray == null){ |
|
201 |
continue; |
|
202 |
} |
|
203 |
|
|
204 |
String rid = ""; |
|
205 |
for (Object aJsonArray : jsonArray) { |
|
206 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
207 |
JSONArray itemIdentifier = new JSONArray(); |
|
208 |
obj = jsonObjectRow.get("c:ItemIdentifier"); |
|
209 |
if(obj instanceof JSONObject){ |
|
210 |
itemIdentifier.add(obj); |
|
211 |
} |
|
212 |
else{ |
|
213 |
//JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier"); |
|
214 |
itemIdentifier = (JSONArray) obj; |
|
215 |
} |
|
216 |
for (Object identifier : itemIdentifier) { |
|
217 |
JSONObject doi = (JSONObject) identifier; |
|
218 |
if(doi.get("c:Type").toString().equals("DOI")){ |
|
219 |
rid = doi.get("c:Value").toString(); |
|
220 |
//System.out.println("DOI: " + rid); |
|
221 |
break; |
|
222 |
} |
|
223 |
} |
|
224 |
if(rid.isEmpty()){ |
|
225 |
continue; |
|
226 |
} |
|
227 |
|
|
228 |
JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance"); |
|
229 |
//for (Object perf : itemPerformance) { |
|
230 |
JSONObject performance = (JSONObject) itemPerformance; |
|
231 |
JSONObject periodObj = (JSONObject) performance.get("c:Period"); |
|
232 |
String period = periodObj.get("c:Begin").toString(); |
|
233 |
JSONObject instanceObj = (JSONObject) performance.get("c:Instance"); |
|
234 |
String type = instanceObj.get("c:MetricType").toString(); |
|
235 |
String count = instanceObj.get("c:Count").toString(); |
|
236 |
//System.out.println(rid + " : " + period + " : " + count); |
|
237 |
|
|
238 |
preparedStatement.setString(1, "SARC-OJS"); |
|
239 |
preparedStatement.setString(2, issn); |
|
240 |
//preparedStatement.setString(2, url); |
|
241 |
preparedStatement.setString(3, rid); |
|
242 |
preparedStatement.setString(4, period); |
|
243 |
preparedStatement.setString(5, type); |
|
244 |
preparedStatement.setInt(6, Integer.parseInt(count)); |
|
245 |
preparedStatement.addBatch(); |
|
246 |
batch_size++; |
|
247 |
if(batch_size == 10000){ |
|
248 |
preparedStatement.executeBatch(); |
|
249 |
conn.commit(); |
|
250 |
batch_size = 0; |
|
251 |
} |
|
252 |
//} |
|
253 |
|
|
254 |
//break; |
|
255 |
} |
|
256 |
//break; |
|
257 |
} |
|
258 |
|
|
259 |
preparedStatement.executeBatch(); |
|
260 |
conn.commit(); |
|
261 |
conn.close(); |
|
262 |
} |
|
263 |
|
|
264 |
private String getJson(String url,String username, String password) throws Exception { |
|
265 |
//String cred=username+":"+password; |
|
266 |
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes()); |
|
267 |
try { |
|
268 |
URL website = new URL(url); |
|
269 |
URLConnection connection = website.openConnection(); |
|
270 |
//connection.setRequestProperty ("Authorization", "Basic "+encoded); |
|
271 |
StringBuilder response; |
|
272 |
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { |
|
273 |
response = new StringBuilder(); |
|
274 |
String inputLine; |
|
275 |
while ((inputLine = in.readLine()) != null) { |
|
276 |
response.append(inputLine); |
|
277 |
response.append("\n"); |
|
278 |
} |
|
279 |
} |
|
280 |
return response.toString(); |
|
281 |
}catch (Exception e){ |
|
282 |
log.error("Failed to get URL: " + e); |
|
283 |
//System.out.println("Failed to get URL: " + e); |
|
284 |
return null; |
|
285 |
//throw new Exception("Failed to get URL: " + e.toString(), e); |
|
286 |
} |
|
287 |
} |
|
288 |
|
|
289 |
/* |
|
290 |
private void flushString(String data, String destination) throws Exception { |
|
291 |
FSDataOutputStream fin; |
|
292 |
try { |
|
293 |
FileSystem fs = FileSystem.get(new Configuration()); |
|
294 |
fin = fs.create(new Path(destination), true); |
|
295 |
fin.write(data.getBytes()); |
|
296 |
fin.close(); |
|
297 |
} catch (Exception e) { |
|
298 |
log.error("Failed to write exported data to a file : ", e); |
|
299 |
throw new Exception("Failed to write exported data to a file : " + e.toString(), e); |
|
300 |
} |
|
301 |
} |
|
302 |
*/ |
|
303 |
} |
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v3/src/main/java/eu/dnetlib/usagestats/export/PiwikStatsDB.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import java.io.*; |
|
4 |
import java.net.URLDecoder; |
|
5 |
import java.sql.Connection; |
|
6 |
import java.sql.DriverManager; |
|
7 |
import java.sql.PreparedStatement; |
|
8 |
import java.sql.Statement; |
|
9 |
import java.sql.Timestamp; |
|
10 |
import java.text.SimpleDateFormat; |
|
11 |
import java.util.*; |
|
12 |
|
|
13 |
import org.apache.hadoop.conf.Configuration; |
|
14 |
import org.apache.hadoop.fs.LocatedFileStatus; |
|
15 |
import org.apache.hadoop.fs.Path; |
|
16 |
import org.apache.hadoop.fs.FileSystem; |
|
17 |
import org.apache.hadoop.fs.RemoteIterator; |
|
18 |
import org.apache.log4j.Logger; |
|
19 |
import org.json.simple.JSONArray; |
|
20 |
import org.json.simple.JSONObject; |
|
21 |
import org.json.simple.parser.JSONParser; |
|
22 |
|
|
23 |
public class PiwikStatsDB { |
|
24 |
private final String dbUrl; |
|
25 |
private final String dbSchema; |
|
26 |
private final String dbUserName; |
|
27 |
private final String dbPassword; |
|
28 |
private final String logPath; |
|
29 |
|
|
30 |
private Connection conn = null; |
|
31 |
private Statement stmt = null; |
|
32 |
|
|
33 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
34 |
|
|
35 |
public PiwikStatsDB(String dbUrl, String dbUserName, String dbPassword, String logPath) throws Exception { |
|
36 |
this.dbUrl = dbUrl; |
|
37 |
this.dbSchema = "shadow"; |
|
38 |
this.dbUserName = dbUserName; |
|
39 |
this.dbPassword = dbPassword; |
|
40 |
this.logPath = logPath; |
|
41 |
|
|
42 |
connectDB(); |
|
43 |
createTables(); |
|
44 |
} |
|
45 |
|
|
46 |
private void connectDB() throws Exception { |
|
47 |
try { |
|
48 |
Class.forName("org.postgresql.Driver"); |
|
49 |
conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword); |
|
50 |
|
|
51 |
stmt = conn.createStatement(); |
|
52 |
String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";"; |
|
53 |
stmt.executeUpdate(sqlSetSearchPath); |
|
54 |
|
|
55 |
//log.info("Opened database successfully"); |
|
56 |
} catch (Exception e) { |
|
57 |
log.error("Connect to db failed: " + e); |
|
58 |
throw new Exception("Failed to connect to db: " + e.toString(), e); |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
private void createTables() throws Exception { |
|
63 |
try { |
|
64 |
stmt = conn.createStatement(); |
|
65 |
String sqlCreateTablePiwikLog = "CREATE TABLE IF NOT EXISTS piwiklog(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; |
|
66 |
String sqlcreateRulePiwikLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + |
|
67 |
" ON INSERT TO piwiklog " + |
|
68 |
" WHERE (EXISTS ( SELECT piwiklog.source, piwiklog.id_visit," + |
|
69 |
"piwiklog.action, piwiklog.\"timestamp\", piwiklog.entity_id " + |
|
70 |
"FROM piwiklog " + |
|
71 |
"WHERE piwiklog.source = new.source AND piwiklog.id_visit = new.id_visit AND piwiklog.action = new.action AND piwiklog.entity_id = new.entity_id AND piwiklog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; |
|
72 |
stmt.executeUpdate(sqlCreateTablePiwikLog); |
|
73 |
stmt.executeUpdate(sqlcreateRulePiwikLog); |
|
74 |
|
|
75 |
String sqlCopyPublicPiwiklog="insert into piwiklog select * from public.piwiklog;"; |
|
76 |
stmt.executeUpdate(sqlCopyPublicPiwiklog); |
|
77 |
|
|
78 |
String sqlCreateTablePortalLog = "CREATE TABLE IF NOT EXISTS process_portal_log(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, timestamp));"; |
|
79 |
String sqlcreateRulePortalLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + |
|
80 |
" ON INSERT TO process_portal_log " + |
|
81 |
" WHERE (EXISTS ( SELECT process_portal_log.source, process_portal_log.id_visit," + |
|
82 |
"process_portal_log.\"timestamp\" " + |
|
83 |
"FROM process_portal_log " + |
|
84 |
"WHERE process_portal_log.source = new.source AND process_portal_log.id_visit = new.id_visit AND process_portal_log.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; |
|
85 |
stmt.executeUpdate(sqlCreateTablePortalLog); |
|
86 |
stmt.executeUpdate(sqlcreateRulePortalLog); |
|
87 |
|
|
88 |
stmt.close(); |
|
89 |
conn.close(); |
|
90 |
log.info("Usage Tables Created"); |
|
91 |
|
|
92 |
} catch (Exception e) { |
|
93 |
log.error("Failed to create tables: " + e); |
|
94 |
throw new Exception("Failed to create tables: " + e.toString(), e); |
|
95 |
//System.exit(0); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
protected void processLogs() throws Exception { |
|
100 |
try { |
|
101 |
processRepositoryLog(); |
|
102 |
log.info("repository process done"); |
|
103 |
removeDoubleClicks(); |
|
104 |
log.info("removing double clicks done"); |
|
105 |
cleanOAI(); |
|
106 |
log.info("cleaning oai done"); |
|
107 |
|
|
108 |
processPortalLog(); |
|
109 |
log.info("portal process done"); |
|
110 |
portalStats(); |
|
111 |
log.info("portal stats done"); |
|
112 |
} catch (Exception e) { |
|
113 |
log.error("Failed to process logs: " + e); |
|
114 |
throw new Exception("Failed to process logs: " + e.toString(), e); |
|
115 |
} |
|
116 |
} |
|
117 |
|
|
118 |
protected void usageStats() throws Exception { |
|
119 |
try { |
|
120 |
//resultStats(); |
|
121 |
//dataSourceStats(); |
|
122 |
//organizationsStats(); |
|
123 |
//projectsStats(); |
|
124 |
//repositoryViewsStats(); |
|
125 |
//repositoryDownloadsStats(); |
|
126 |
viewsStats(); |
|
127 |
downloadsStats(); |
|
128 |
log.info("stat tables and views done"); |
|
129 |
} catch (Exception e) { |
|
130 |
log.error("Failed to create usage stats: " + e); |
|
131 |
throw new Exception("Failed to create usage stats: " + e.toString(), e); |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
// Import repository Logs to DB |
|
136 |
private void processRepositoryLog() throws Exception { |
|
137 |
if (conn.isClosed()) |
|
138 |
connectDB(); |
|
139 |
|
|
140 |
stmt = conn.createStatement(); |
|
141 |
conn.setAutoCommit(false); |
|
142 |
|
|
143 |
ArrayList<String> jsonFiles = listHdfsDir(logPath + "repolog"); |
|
144 |
|
|
145 |
PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO piwiklog (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)"); |
|
146 |
int batch_size = 0; |
|
147 |
for (String jsonFile : jsonFiles) { |
|
148 |
JSONParser parser = new JSONParser(); |
|
149 |
JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile)); |
|
150 |
|
|
151 |
for (Object aJsonArray : jsonArray) { |
|
152 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
153 |
int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString()); |
|
154 |
String idVisit = jsonObjectRow.get("idVisit").toString(); |
|
155 |
String country = jsonObjectRow.get("country").toString(); |
|
156 |
String referrerName = jsonObjectRow.get("referrerName").toString(); |
|
157 |
String agent = jsonObjectRow.get("browser").toString(); |
|
158 |
|
|
159 |
String sourceItemType = "repItem"; |
|
160 |
|
|
161 |
JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails")); |
|
162 |
for (Object actionDetail : actionDetails) { |
|
163 |
JSONObject actionDetailsObj = (JSONObject) actionDetail; |
|
164 |
|
|
165 |
if (actionDetailsObj.get("customVariables") != null) { |
|
166 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|
167 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
168 |
Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000); |
|
169 |
String url = actionDetailsObj.get("url").toString(); |
|
170 |
String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1")).get("customVariablePageValue1").toString(); |
|
171 |
String action = actionDetailsObj.get("type").toString(); |
|
172 |
|
|
173 |
prepStatem.setInt(1, idSite); |
|
174 |
prepStatem.setString(2, idVisit); |
|
175 |
prepStatem.setString(3, country); |
|
176 |
prepStatem.setString(4, action); |
|
177 |
prepStatem.setString(5, url); |
|
178 |
prepStatem.setString(6, oaipmh); |
|
179 |
prepStatem.setString(7, sourceItemType); |
|
180 |
prepStatem.setString(8, simpleDateFormat.format(timestamp)); |
|
181 |
prepStatem.setString(9, referrerName); |
|
182 |
prepStatem.setString(10, agent); |
|
183 |
prepStatem.addBatch(); |
|
184 |
batch_size++; |
|
185 |
if (batch_size == 10000) { |
|
186 |
prepStatem.executeBatch(); |
|
187 |
conn.commit(); |
|
188 |
batch_size = 0; |
|
189 |
} |
|
190 |
} |
|
191 |
} |
|
192 |
} |
|
193 |
} |
|
194 |
prepStatem.executeBatch(); |
|
195 |
conn.commit(); |
|
196 |
stmt.close(); |
|
197 |
conn.close(); |
|
198 |
} |
|
199 |
|
|
200 |
private void removeDoubleClicks() throws Exception { |
|
201 |
if (conn.isClosed()) |
|
202 |
connectDB(); |
|
203 |
|
|
204 |
stmt = conn.createStatement(); |
|
205 |
conn.setAutoCommit(false); |
|
206 |
|
|
207 |
//clean download double clicks |
|
208 |
String sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<30 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);"; |
|
209 |
stmt.executeUpdate(sql); |
|
210 |
stmt.close(); |
|
211 |
conn.commit(); |
|
212 |
|
|
213 |
stmt = conn.createStatement(); |
|
214 |
|
|
215 |
//clean view double clicks |
|
216 |
sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp from piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<10 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);"; |
|
217 |
stmt.executeUpdate(sql); |
|
218 |
stmt.close(); |
|
219 |
conn.commit(); |
|
220 |
conn.close(); |
|
221 |
} |
|
222 |
|
|
223 |
private void viewsStats() throws Exception { |
|
224 |
if (conn.isClosed()) |
|
225 |
connectDB(); |
|
226 |
|
|
227 |
stmt = conn.createStatement(); |
|
228 |
conn.setAutoCommit(false); |
|
229 |
|
|
230 |
//String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
231 |
String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
232 |
stmt.executeUpdate(sql); |
|
233 |
|
|
234 |
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
235 |
sql = "CREATE TABLE IF NOT EXISTS views_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
236 |
stmt.executeUpdate(sql); |
|
237 |
|
|
238 |
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
239 |
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
240 |
stmt.executeUpdate(sql); |
|
241 |
|
|
242 |
sql = "DROP VIEW IF EXISTS result_views_monthly;"; |
|
243 |
stmt.executeUpdate(sql); |
|
244 |
|
|
245 |
stmt.close(); |
|
246 |
conn.commit(); |
|
247 |
conn.close(); |
|
248 |
} |
|
249 |
|
|
250 |
private void downloadsStats() throws Exception { |
|
251 |
if (conn.isClosed()) |
|
252 |
connectDB(); |
|
253 |
|
|
254 |
stmt = conn.createStatement(); |
|
255 |
conn.setAutoCommit(false); |
|
256 |
|
|
257 |
//String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
258 |
String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
259 |
stmt.executeUpdate(sql); |
|
260 |
|
|
261 |
//sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
262 |
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
263 |
sql = "CREATE TABLE IF NOT EXISTS downloads_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; |
|
264 |
stmt.executeUpdate(sql); |
|
265 |
|
|
266 |
sql = "DROP VIEW IF EXISTS result_downloads_monthly;"; |
|
267 |
stmt.executeUpdate(sql); |
|
268 |
|
|
269 |
stmt.close(); |
|
270 |
conn.commit(); |
|
271 |
conn.close(); |
|
272 |
} |
|
273 |
|
|
274 |
public void finalizeStats() throws Exception { |
|
275 |
if (conn.isClosed()) |
|
276 |
connectDB(); |
|
277 |
|
|
278 |
stmt = conn.createStatement(); |
|
279 |
conn.setAutoCommit(false); |
|
280 |
|
|
281 |
Calendar startCalendar = Calendar.getInstance(); |
|
282 |
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01")); |
|
283 |
Calendar endCalendar = Calendar.getInstance(); |
|
284 |
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); |
|
285 |
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
|
286 |
|
|
287 |
// String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; |
|
288 |
String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; |
|
289 |
stmt.executeUpdate(sql); |
|
290 |
|
|
291 |
sql = "CREATE INDEX full_dates_full_date ON full_dates USING btree(full_date);"; |
|
292 |
stmt.executeUpdate(sql); |
|
293 |
|
|
294 |
|
|
295 |
sql = "CREATE INDEX views_stats_source ON views_stats USING btree(source);"; |
|
296 |
stmt.executeUpdate(sql); |
|
297 |
|
|
298 |
sql = "CREATE INDEX views_stats_repository_id ON views_stats USING btree(repository_id);"; |
|
299 |
stmt.executeUpdate(sql); |
|
300 |
|
|
301 |
sql = "CREATE INDEX views_stats_result_id ON views_stats USING btree(result_id);"; |
|
302 |
stmt.executeUpdate(sql); |
|
303 |
|
|
304 |
sql = "CREATE INDEX views_stats_date ON views_stats USING btree(date);"; |
|
305 |
stmt.executeUpdate(sql); |
|
306 |
|
|
307 |
|
|
308 |
sql = "CREATE INDEX pageviews_stats_source ON pageviews_stats USING btree(source);"; |
|
309 |
stmt.executeUpdate(sql); |
|
310 |
|
|
311 |
sql = "CREATE INDEX pageviews_stats_repository_id ON pageviews_stats USING btree(repository_id);"; |
|
312 |
stmt.executeUpdate(sql); |
|
313 |
|
|
314 |
sql = "CREATE INDEX pageviews_stats_result_id ON pageviews_stats USING btree(result_id);"; |
|
315 |
stmt.executeUpdate(sql); |
|
316 |
|
|
317 |
sql = "CREATE INDEX pageviews_stats_date ON pageviews_stats USING btree(date);"; |
|
318 |
stmt.executeUpdate(sql); |
|
319 |
|
|
320 |
|
|
321 |
sql = "CREATE INDEX downloads_stats_source ON downloads_stats USING btree(source);"; |
|
322 |
stmt.executeUpdate(sql); |
|
323 |
|
|
324 |
sql = "CREATE INDEX downloads_stats_repository_id ON downloads_stats USING btree(repository_id);"; |
|
325 |
stmt.executeUpdate(sql); |
|
326 |
|
|
327 |
sql = "CREATE INDEX downloads_stats_result_id ON downloads_stats USING btree(result_id);"; |
|
328 |
stmt.executeUpdate(sql); |
|
329 |
|
|
330 |
sql = "CREATE INDEX downloads_stats_date ON downloads_stats USING btree(date);"; |
|
331 |
stmt.executeUpdate(sql); |
|
332 |
|
|
333 |
// sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
334 |
sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; |
|
335 |
stmt.executeUpdate(sql); |
|
336 |
|
|
337 |
sql = "CREATE INDEX usage_stats_source ON usage_stats USING btree(source);"; |
|
338 |
stmt.executeUpdate(sql); |
|
339 |
|
|
340 |
sql = "CREATE INDEX usage_stats_repository_id ON usage_stats USING btree(repository_id);"; |
|
341 |
stmt.executeUpdate(sql); |
|
342 |
|
|
343 |
sql = "CREATE INDEX usage_stats_result_id ON usage_stats USING btree(result_id);"; |
|
344 |
stmt.executeUpdate(sql); |
|
345 |
|
|
346 |
sql = "CREATE INDEX usage_stats_date ON usage_stats USING btree(date);"; |
|
347 |
stmt.executeUpdate(sql); |
|
348 |
|
|
349 |
stmt.close(); |
|
350 |
conn.commit(); |
|
351 |
conn.close(); |
|
352 |
} |
|
353 |
|
|
354 |
//views stats |
|
355 |
// private void viewsStatsOLD() throws Exception { |
|
356 |
// if (conn.isClosed()) |
|
357 |
// connectDB(); |
|
358 |
// |
|
359 |
// stmt = conn.createStatement(); |
|
360 |
// conn.setAutoCommit(false); |
|
361 |
// |
|
362 |
// Calendar startCalendar = Calendar.getInstance(); |
|
363 |
// startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01")); |
|
364 |
// Calendar endCalendar = Calendar.getInstance(); |
|
365 |
// int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); |
|
366 |
// int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
|
367 |
// |
|
368 |
// String sql = "CREATE OR REPLACE VIEW result_views_monthly as select entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
369 |
// stmt.executeUpdate(sql); |
|
370 |
// |
|
371 |
// sql = "SELECT d.id, d.new_date AS month, CASE when rdm.views IS NULL THEN 0 ELSE rdm.views END, d.source INTO result_views_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_views_monthly rdsm) d LEFT JOIN (SELECT id, month, views, source FROM result_views_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;"; |
|
372 |
// stmt.executeUpdate(sql); |
|
373 |
// |
|
374 |
// sql = "CREATE INDEX result_views_sushi_id ON result_views_sushi USING btree (id);"; |
|
375 |
// stmt.executeUpdate(sql); |
|
376 |
// |
|
377 |
// sql = "CREATE INDEX result_views_sushi_month ON result_views_sushi USING btree (month);"; |
|
378 |
// stmt.executeUpdate(sql); |
|
379 |
// |
|
380 |
// sql = "CREATE INDEX result_views_sushi_source ON result_views_sushi USING btree (source);"; |
|
381 |
// stmt.executeUpdate(sql); |
|
382 |
// |
|
383 |
// sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, max(views) as views, source INTO result_views FROM result_views_sushi rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY source, roid.id, month ORDER BY source, roid.id, month;"; |
|
384 |
// stmt.executeUpdate(sql); |
|
385 |
// |
|
386 |
// sql = "CREATE INDEX result_views_id ON result_views USING btree (id);"; |
|
387 |
// stmt.executeUpdate(sql); |
|
388 |
// |
|
389 |
// sql = "CREATE INDEX result_views_month ON result_views USING btree (month);"; |
|
390 |
// stmt.executeUpdate(sql); |
|
391 |
// |
|
392 |
// sql = "CREATE INDEX result_views_source ON result_views USING btree (source);"; |
|
393 |
// stmt.executeUpdate(sql); |
|
394 |
// |
|
395 |
// |
|
396 |
// sql = "DROP VIEW IF EXISTS result_views_monthly;"; |
|
397 |
// stmt.executeUpdate(sql); |
|
398 |
// |
|
399 |
// stmt.close(); |
|
400 |
// conn.commit(); |
|
401 |
// conn.close(); |
|
402 |
// } |
|
403 |
|
|
404 |
//downloads stats |
|
405 |
// private void downloadsStatsOLD() throws Exception { |
|
406 |
// if (conn.isClosed()) |
|
407 |
// connectDB(); |
|
408 |
// |
|
409 |
// stmt = conn.createStatement(); |
|
410 |
// conn.setAutoCommit(false); |
|
411 |
// |
|
412 |
// Calendar startCalendar = Calendar.getInstance(); |
|
413 |
// startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01")); |
|
414 |
// Calendar endCalendar = Calendar.getInstance(); |
|
415 |
// int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); |
|
416 |
// int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
|
417 |
// |
|
418 |
// String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; |
|
419 |
// stmt.executeUpdate(sql); |
|
420 |
// |
|
421 |
// sql = "SELECT d.id, d.new_date AS month, CASE when rdm.downloads IS NULL THEN 0 ELSE rdm.downloads END, d.source INTO result_downloads_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_downloads_monthly rdsm) d LEFT JOIN (SELECT id, month, downloads, source FROM result_downloads_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;"; |
|
422 |
// stmt.executeUpdate(sql); |
|
423 |
// |
|
424 |
// sql = "CREATE INDEX result_downloads_sushi_id ON result_downloads_sushi USING btree (id);"; |
|
425 |
// stmt.executeUpdate(sql); |
|
426 |
// |
|
427 |
// sql = "CREATE INDEX result_downloads_sushi_month ON result_downloads_sushi USING btree (month);"; |
|
428 |
// stmt.executeUpdate(sql); |
|
429 |
// |
|
430 |
// sql = "CREATE INDEX result_downloads_sushi_source ON result_downloads_sushi USING btree (source);"; |
|
431 |
// stmt.executeUpdate(sql); |
|
432 |
// |
|
433 |
// sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, downloads, source INTO result_downloads FROM result_downloads_sushi rvs, result_oids roid WHERE rvs.id=roid.orid ORDER BY source, roid.id, month;"; |
|
434 |
// stmt.executeUpdate(sql); |
|
435 |
// |
|
436 |
// sql = "CREATE INDEX result_downloads_id ON result_downloads USING btree (id);"; |
|
437 |
// stmt.executeUpdate(sql); |
|
438 |
// |
|
439 |
// sql = "CREATE INDEX result_downloads_month ON result_downloads USING btree (month);"; |
|
440 |
// stmt.executeUpdate(sql); |
|
441 |
// |
|
442 |
// sql = "CREATE INDEX result_downloads_source ON result_downloads USING btree (source);"; |
|
443 |
// stmt.executeUpdate(sql); |
|
444 |
// |
|
445 |
// |
|
446 |
// sql = "DROP VIEW IF EXISTS result_downloads_monthly;"; |
|
447 |
// stmt.executeUpdate(sql); |
|
448 |
// |
|
449 |
// stmt.close(); |
|
450 |
// conn.commit(); |
|
451 |
// conn.close(); |
|
452 |
// } |
|
453 |
|
|
454 |
//Create repository Views statistics |
|
455 |
private void repositoryViewsStats() throws Exception { |
|
456 |
if (conn.isClosed()) |
|
457 |
connectDB(); |
|
458 |
|
|
459 |
stmt = conn.createStatement(); |
|
460 |
conn.setAutoCommit(false); |
|
461 |
|
|
462 |
// String sql = "SELECT entity_id AS id , COUNT(entity_id) AS number_of_views, timestamp::date AS date, source INTO repo_view_stats FROM piwiklog WHERE source!='5' AND action=\'action\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;"; |
|
463 |
String sql = "CREATE TABLE IF NOT EXISTS repo_view_stats AS SELECT entity_id AS id , COUNT(entity_id) AS number_of_views, timestamp::date AS date, source FROM piwiklog WHERE source!='5' AND action=\'action\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;"; |
|
464 |
stmt.executeUpdate(sql); |
|
465 |
|
|
466 |
sql = "CREATE INDEX repo_view_stats_id ON repo_view_stats USING btree (id)"; |
|
467 |
stmt.executeUpdate(sql); |
|
468 |
|
|
469 |
sql = "CREATE INDEX repo_view_stats_date ON repo_view_stats USING btree(date)"; |
|
470 |
stmt.executeUpdate(sql); |
|
471 |
|
|
472 |
// sql = "SELECT roid.id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_view_stats_monthly_clean FROM repo_view_stats rvs, result_oids roid where rvs.id=roid.orid group by roid.id, month, source;"; |
|
473 |
sql = "CREATE TABLE IF NOT EXISTS repo_view_stats_monthly_clean AS SELECT roid.id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source FROM repo_view_stats rvs, result_oids roid where rvs.id=roid.orid group by roid.id, month, source;"; |
|
474 |
stmt.executeUpdate(sql); |
|
475 |
|
|
476 |
sql = "CREATE INDEX repo_view_stats_monthly_clean_id ON repo_view_stats_monthly_clean USING btree (id)"; |
|
477 |
stmt.executeUpdate(sql); |
|
478 |
|
|
479 |
sql = "CREATE INDEX repo_view_stats_monthly_clean_month ON repo_view_stats_monthly_clean USING btree(month)"; |
|
480 |
stmt.executeUpdate(sql); |
|
481 |
|
|
482 |
sql = "CREATE INDEX repo_view_stats_monthly_clean_source ON repo_view_stats_monthly_clean USING btree(source)"; |
|
483 |
stmt.executeUpdate(sql); |
|
484 |
|
|
485 |
Calendar startCalendar = Calendar.getInstance(); |
|
486 |
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01")); |
|
487 |
Calendar endCalendar = Calendar.getInstance(); |
|
488 |
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); |
|
489 |
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
|
490 |
|
|
491 |
//sql="CREATE OR REPLACE view repo_view_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
492 |
// sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_view_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
493 |
sql = "CREATE TABLE IF NOT EXISTS repo_view_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
494 |
stmt.executeUpdate(sql); |
|
495 |
|
|
496 |
sql = "CREATE INDEX repo_view_stats_monthly_id ON repo_view_stats_monthly USING btree (id)"; |
|
497 |
stmt.executeUpdate(sql); |
|
498 |
|
|
499 |
sql = "CREATE INDEX repo_view_stats_monthly_month ON repo_view_stats_monthly USING btree(month)"; |
|
500 |
stmt.executeUpdate(sql); |
|
501 |
|
|
502 |
sql = "CREATE INDEX repo_view_stats_monthly_source ON repo_view_stats_monthly USING btree(source)"; |
|
503 |
stmt.executeUpdate(sql); |
|
504 |
|
|
505 |
sql = "CREATE OR REPLACE view repo_view_stats_monthly_sushi AS SELECT id, sum(number_of_views), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_view_stats group by id, month, source;"; |
|
506 |
stmt.executeUpdate(sql); |
|
507 |
|
|
508 |
stmt.close(); |
|
509 |
conn.commit(); |
|
510 |
conn.close(); |
|
511 |
} |
|
512 |
|
|
513 |
//Create repository downloads statistics |
|
514 |
private void repositoryDownloadsStats() throws Exception { |
|
515 |
if (conn.isClosed()) |
|
516 |
connectDB(); |
|
517 |
|
|
518 |
stmt = conn.createStatement(); |
|
519 |
conn.setAutoCommit(false); |
|
520 |
|
|
521 |
// String sql = "SELECT entity_id AS id, COUNT(entity_id) AS number_of_downloads, timestamp::date AS date, source INTO repo_download_stats FROM piwiklog WHERE source!='5' AND action=\'download\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;"; |
|
522 |
String sql = "CREATE TABLE IF NOT EXISTS repo_download_stats AS SELECT entity_id AS id, COUNT(entity_id) AS number_of_downloads, timestamp::date AS date, source FROM piwiklog WHERE source!='5' AND action=\'download\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;"; |
|
523 |
stmt.executeUpdate(sql); |
|
524 |
|
|
525 |
sql = "CREATE INDEX repo_download_stats_id ON repo_download_stats USING btree (id)"; |
|
526 |
stmt.executeUpdate(sql); |
|
527 |
|
|
528 |
sql = "CREATE INDEX repo_download_stats_date ON repo_download_stats USING btree(date)"; |
|
529 |
stmt.executeUpdate(sql); |
|
530 |
|
|
531 |
// sql = "SELECT roid.id, sum(number_of_downloads), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_download_stats_monthly_clean FROM repo_download_stats rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY roid.id, month, source;"; |
|
532 |
sql = "CREATE TABLE IF NOT EXISTS repo_download_stats_monthly_clean AS SELECT roid.id, sum(number_of_downloads), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source FROM repo_download_stats rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY roid.id, month, source;"; |
|
533 |
stmt.executeUpdate(sql); |
|
534 |
|
|
535 |
sql = "CREATE INDEX repo_download_stats_monthly_clean_id ON repo_download_stats_monthly_clean USING btree (id)"; |
|
536 |
stmt.executeUpdate(sql); |
|
537 |
|
|
538 |
sql = "CREATE INDEX repo_download_stats_monthly_clean_month ON repo_download_stats_monthly_clean USING btree(month)"; |
|
539 |
stmt.executeUpdate(sql); |
|
540 |
|
|
541 |
sql = "CREATE INDEX repo_download_stats_monthly_clean_source ON repo_download_stats_monthly_clean USING btree(source)"; |
|
542 |
stmt.executeUpdate(sql); |
|
543 |
|
|
544 |
Calendar startCalendar = Calendar.getInstance(); |
|
545 |
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01")); |
|
546 |
Calendar endCalendar = Calendar.getInstance(); |
|
547 |
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); |
|
548 |
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); |
|
549 |
|
|
550 |
//sql="CREATE OR REPLACE view repo_download_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
551 |
// sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_download_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
552 |
sql = "CREATE TABLE IF NOT EXISTS repo_download_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date"; |
|
553 |
stmt.executeUpdate(sql); |
|
554 |
|
|
555 |
sql = "CREATE INDEX repo_download_stats_monthly_id ON repo_download_stats_monthly USING btree (id)"; |
|
556 |
stmt.executeUpdate(sql); |
|
557 |
|
|
558 |
sql = "CREATE INDEX repo_download_stats_monthly_month ON repo_download_stats_monthly USING btree(month)"; |
|
559 |
stmt.executeUpdate(sql); |
|
560 |
|
|
561 |
sql = "CREATE INDEX repo_download_stats_monthly_source ON repo_download_stats_monthly USING btree(source)"; |
|
562 |
stmt.executeUpdate(sql); |
|
563 |
|
|
564 |
|
|
565 |
sql = "CREATE OR REPLACE view repo_download_stats_monthly_sushi AS SELECT id, sum(number_of_downloads), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_download_stats group by id, month, source;"; |
|
566 |
stmt.executeUpdate(sql); |
|
567 |
|
|
568 |
stmt.close(); |
|
569 |
conn.commit(); |
|
570 |
conn.close(); |
|
571 |
} |
|
572 |
|
|
573 |
// Import OPENAIRE Logs to DB |
|
574 |
private void processPortalLog() throws Exception { |
|
575 |
|
|
576 |
if (conn.isClosed()) |
|
577 |
connectDB(); |
|
578 |
|
|
579 |
stmt = conn.createStatement(); |
|
580 |
conn.setAutoCommit(false); |
|
581 |
|
|
582 |
ArrayList<String> jsonFiles = listHdfsDir(logPath + "portallog"); |
|
583 |
PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO process_portal_log (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)"); |
|
584 |
int batch_size = 0; |
|
585 |
JSONParser parser = new JSONParser(); |
|
586 |
for (String jsonFile : jsonFiles) { |
|
587 |
JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile)); |
|
588 |
|
|
589 |
for (Object aJsonArray : jsonArray) { |
|
590 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
591 |
int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString()); |
|
592 |
String idVisit = jsonObjectRow.get("idVisit").toString(); |
|
593 |
String country = jsonObjectRow.get("country").toString(); |
|
594 |
String referrerName = jsonObjectRow.get("referrerName").toString(); |
|
595 |
String agent = jsonObjectRow.get("browser").toString(); |
|
596 |
|
|
597 |
JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails")); |
|
598 |
for (Object actionDetail : actionDetails) { |
|
599 |
JSONObject actionDetailsObj = (JSONObject) actionDetail; |
|
600 |
|
|
601 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|
602 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
603 |
Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000); |
|
604 |
|
|
605 |
String action = actionDetailsObj.get("type").toString(); |
|
606 |
String url = actionDetailsObj.get("url").toString(); |
|
607 |
|
|
608 |
String entityID = processPortalURL(url); |
|
609 |
String sourceItemType = ""; |
|
610 |
|
|
611 |
if (entityID.indexOf("|") > 0) { |
|
612 |
sourceItemType = entityID.substring(0, entityID.indexOf("|")); |
|
613 |
entityID = entityID.substring(entityID.indexOf("|") + 1); |
|
614 |
} |
|
615 |
|
|
616 |
prepStatem.setInt(1, idSite); |
|
617 |
prepStatem.setString(2, idVisit); |
|
618 |
prepStatem.setString(3, country); |
|
619 |
prepStatem.setString(4, action); |
|
620 |
prepStatem.setString(5, url); |
|
621 |
prepStatem.setString(6, entityID); |
|
622 |
prepStatem.setString(7, sourceItemType); |
|
623 |
prepStatem.setString(8, simpleDateFormat.format(timestamp)); |
|
624 |
prepStatem.setString(9, referrerName); |
|
625 |
prepStatem.setString(10, agent); |
|
626 |
|
|
627 |
prepStatem.addBatch(); |
|
628 |
batch_size++; |
|
629 |
if (batch_size == 10000) { |
|
630 |
prepStatem.executeBatch(); |
|
631 |
conn.commit(); |
|
632 |
batch_size = 0; |
|
633 |
} |
|
634 |
} |
|
635 |
} |
|
636 |
} |
|
637 |
prepStatem.executeBatch(); |
|
638 |
conn.commit(); |
|
639 |
|
|
640 |
stmt.close(); |
|
641 |
conn.close(); |
|
642 |
} |
Also available in: Unified diff
Usage Statistics Expport New Workflow