Project

General

Profile

« Previous | Next » 

Revision 57234

Added by Dimitris Pierrakos over 4 years ago

Usage Stats Export v2

View differences:

modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v2/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

  
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20

  
21
public class PiwikDownloadLogs {
22

  
23
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30

  
31
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34

  
35
    /*
36
       The Piwik's API method 
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40

  
41
    private final Logger log = Logger.getLogger(this.getClass());
42

  
43

  
44
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49

  
50
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52

  
53
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58
    }
59

  
60
    private String getPiwikLogUrl(){
61
        return httpProtocol + "://" + piwikUrl + "/";
62
    }
63

  
64
    private String getJson(String url,String username, String password) throws Exception {
65
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70

  
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84
            throw new Exception("Failed to get URL: " + e.toString(), e);
85
        }
86
    }
87

  
88
    public void getPiwikLogs() throws Exception{
89
        GetPortalLogs();
90
        GetRepositoriesLogs();
91
    }
92

  
93
    private void GetPortalLogs() throws Exception{
94

  
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97
        start.setTime(startDate);
98
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101

  
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5' HAVING max(timestamp) is not null;");
107
            ResultSet rs_date = st.executeQuery();
108

  
109
            while(rs_date.next()){
110
                start.setTime(sdf.parse(rs_date.getString(1)));
111
            }
112
            rs_date.close();
113
            conn.close();
114

  
115
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
116

  
117
                String period="&period=day&date="+sdf.format(date);
118
                log.info("Downloading logs for " + sdf.format(date));
119

  
120

  
121
                FileSystem fs = FileSystem.get(new Configuration());
122
                FSDataOutputStream fin = fs.create(new Path(logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json"), true);
123
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=5" + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
124
                String content = "";
125

  
126
                int i=0;
127

  
128
                while(!content.equals("[]\n")) {
129
                    String apiUrl = baseApiUrl;
130

  
131
                    if (i > 0)
132
                        apiUrl += "&filter_offset=" + (i*1000);
133

  
134
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
135

  
136
                    fin.write(content.getBytes());
137

  
138
                    i++;
139
                }
140
                fin.close();
141
//
142
//
143
//
144
//
145
//
146
//
147
//                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
148
//                String content = getJson(apiUrl,piwikUsername,piwikPassword);
149
//
150
//                //for (int i=1;i<10;i++){
151
//                int i = 1;
152
//                while(true) {
153
//                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
154
//                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
155
//                    content += contentNew;
156
//                    i++;
157
//                    if(contentNew.equals("[]\n")){
158
//                        break;
159
//                    }
160
//                }
161
//                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
162
            }
163
        } catch (Exception e) {
164
            log.error("Failed to get portal logs", e);
165
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
166
        }
167
    }
168

  
169
    private void GetRepositoriesLogs() throws Exception{
170

  
171
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
172
        Calendar start = Calendar.getInstance();
173
        start.setTime(startDate);
174
        Calendar end = Calendar.getInstance();
175
        end.add(Calendar.DAY_OF_MONTH, -1);
176
        //end.setTime(getFinalDate());
177

  
178
        Class.forName("org.postgresql.Driver");
179
        Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
180
        Statement statement = conn.createStatement();
181
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
182
        while(rs.next()){
183
            int siteId = rs.getInt(1);
184
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
185

  
186
            start.setTime(startDate);
187

  
188
            log.info("downloading logs for site with piwik_id: " + siteId);
189

  
190
            st.setInt(1, siteId);
191
            ResultSet rs_date = st.executeQuery();
192

  
193
            while(rs_date.next()){
194
                //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
195
                if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
196
//                        start = Calendar.getInstance();
197
//                        start.add(Calendar.MONTH, -1);
198
                    // DO NOTHING USE this.startDate!!!
199
                }
200
                else {
201
                    start.setTime(sdf.parse(rs_date.getString(1)));
202
                }
203
            }
204
            rs_date.close();
205

  
206
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
207

  
208
                log.info("Downloading logs for " + sdf.format(date));
209

  
210
                String period="&period=day&date="+sdf.format(date);
211
                FileSystem fs = FileSystem.get(new Configuration());
212
                FSDataOutputStream fin = fs.create(new Path(logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date))+".json"), true);
213
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
214
                String content = "";
215

  
216
                int i=0;
217

  
218
                while(!content.equals("[]\n")) {
219
                    String apiUrl = baseApiUrl;
220

  
221
                    if (i > 0)
222
                        apiUrl += "&filter_offset=" + (i*1000);
223

  
224
                    content = getJson(apiUrl, piwikUsername, piwikPassword);
225

  
226
                    fin.write(content.getBytes());
227

  
228
                    i++;
229
                }
230
                fin.close();
231

  
232
            }
233
        }
234
        rs.close();
235
        conn.close();
236
    }
237

  
238
//    private void flushString(String data, String destination) throws Exception {
239
//        FSDataOutputStream fin;
240
//        try {
241
//            FileSystem fs = FileSystem.get(new Configuration());
242
//            fin = fs.create(new Path(destination), true);
243
//            fin.write(data.getBytes());
244
//            fin.close();
245
//        } catch (Exception e) {
246
//            log.error("Failed  to write exported data to a file : ", e);
247
//            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
248
//        }
249
//    }
250
}
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v2/src/main/java/eu/dnetlib/usagestats/export/IrusStats.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
//import java.io.BufferedReader;
5
//import java.io.InputStreamReader;
6
import java.net.URL;
7
import java.net.URLConnection;
8
import java.sql.ResultSet;
9
import java.text.SimpleDateFormat;
10
import java.util.Date;
11
import java.util.Calendar;
12

  
13
import java.sql.Connection;
14
import java.sql.DriverManager;
15
import java.sql.PreparedStatement;
16
import java.sql.Statement;
17

  
18
import org.json.simple.JSONArray;
19
import org.json.simple.JSONObject;
20
import org.json.simple.parser.JSONParser;
21

  
22
import org.apache.log4j.Logger;
23

  
24
/**
25
 * Created by tsampikos on 28/3/2017.
26
 */
27
public class IrusStats {
28
     private final String dbUrl;
29
    private final String dbSchema;
30
    private final String dbUserName;
31
    private final String dbPassword;
32

  
33
    private Connection conn = null;
34
    private Statement stmt = null;
35

  
36
    private final Logger log = Logger.getLogger(this.getClass());
37

  
38
    public IrusStats(String dbUrl, String dbUsername, String dbPassword) throws Exception {
39
        this.dbUrl = dbUrl;
40
        this.dbSchema = "shadow";
41
        this.dbUserName = dbUsername;
42
        this.dbPassword = dbPassword;
43

  
44
        connectDB();
45
        createTables();
46
    }
47

  
48
    private void connectDB() throws Exception {
49
        try {
50
            Class.forName("org.postgresql.Driver");
51
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
52

  
53
            stmt = conn.createStatement();
54
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
55
            stmt.executeUpdate(sqlSetSearchPath);
56

  
57
            log.info("Opened database successfully");
58

  
59
        } catch (Exception e) {
60
            log.error("Connect to db failed: " + e);
61
            throw new Exception("Failed to connect to db: " + e.toString(), e);
62
        }
63
    }
64

  
65
    private void createTables() throws Exception {
66
        try {
67

  
68
            stmt = conn.createStatement();
69
            String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
70
            stmt.executeUpdate(sqlCreateTableSushiLog);
71

  
72
            stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;");
73

  
74
            String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
75
            stmt.executeUpdate(sqlCopyPublicSushiLog);
76

  
77
            String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
78
                " ON INSERT TO sushilog " +
79
                " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," +
80
                "sushilog.rid, sushilog.date " +
81
                "FROM sushilog " +
82
                "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
83
            stmt.executeUpdate(sqlcreateRuleSushiLog);
84

  
85
            stmt.close();
86
            conn.close();
87
            log.info("Sushi Tables Created");
88
        } catch (Exception e) {
89
            log.error("Failed to create tables: " + e);
90
            throw new Exception("Failed to create tables: " + e.toString(), e);
91
        }
92
    }
93

  
94
    public void irusStats() throws Exception {
95
        if (conn.isClosed())
96
            connectDB();
97

  
98
        stmt = conn.createStatement();
99
        conn.setAutoCommit(false);
100

  
101
        //String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
102
        //String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
103
        //String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total';";
104
        String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total' AND s.source='IRUS-UK';";
105
        stmt.executeUpdate(sql);
106

  
107

  
108
        stmt.close();
109
        conn.commit();
110
        conn.close();
111
    }
112

  
113
    public void processIrusRRReport() throws Exception {
114
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
115
        String reportUrl = "https://irus.jisc.ac.uk" + "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" + simpleDateFormat.format(new Date()) + "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
116

  
117
        log.info("Getting Irus report: " + reportUrl);
118

  
119
        String text = getJson(reportUrl, "", "");
120

  
121
        log.info("Report: " + text);
122

  
123
        JSONParser parser = new JSONParser();
124
        JSONObject jsonObject = (JSONObject) parser.parse(text);
125
        jsonObject = (JSONObject) jsonObject.get("ReportResponse");
126
        jsonObject = (JSONObject) jsonObject.get("Report");
127
        jsonObject = (JSONObject) jsonObject.get("Report");
128
        jsonObject = (JSONObject) jsonObject.get("Customer");
129
        JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
130
        int i = 0;
131
        for (Object aJsonArray : jsonArray) {
132
            JSONObject jsonObjectRow = (JSONObject) aJsonArray;
133
            JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
134
            for (Object identifier : itemIdentifier) {
135
                JSONObject opendoar = (JSONObject) identifier;
136
                if(opendoar.get("Type").toString().equals("OpenDOAR")){
137
                    //System.out.println(i + ": " + opendoar.get("Value").toString());
138
                    log.info(i + ": " + opendoar.get("Value").toString());
139
                    i++;
140
                    processIrusIRReport(opendoar.get("Value").toString());
141
                    break;
142
                }
143
            }
144
            //break;
145
        }
146
    }
147

  
148
    private void processIrusIRReport(String opendoar) throws Exception {
149
        if (conn.isClosed())
150
            connectDB();
151

  
152
        conn.setAutoCommit(false);
153

  
154
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
155

  
156
        Calendar start = Calendar.getInstance();
157
        start.set(Calendar.YEAR, 2016);
158
        start.set(Calendar.MONTH, Calendar.JANUARY);
159
        //start.setTime(simpleDateFormat.parse("2016-01"));
160

  
161
        Calendar end = Calendar.getInstance();
162
        end.add(Calendar.DAY_OF_MONTH, -1);
163

  
164
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
165
        PreparedStatement st = conn.prepareStatement("SELECT max(date) FROM public.sushilog WHERE repository=?;");
166
        st.setString(1, "opendoar____::" + opendoar);
167
        ResultSet rs_date = st.executeQuery();
168
        while(rs_date.next()){
169
            if(rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
170
                start.setTime(sdf.parse(rs_date.getString(1)));
171
            }
172
        }
173
        rs_date.close();
174

  
175
        PreparedStatement preparedStatement = conn.prepareStatement("INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
176
        int batch_size = 0;
177

  
178
        while(start.before(end)){
179
            //log.info("date: " + simpleDateFormat.format(start.getTime()));
180
            String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) + "&RepositoryIdentifier=opendoar%3A" + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
181
            start.add(Calendar.MONTH, 1);
182

  
183
            String text = getJson(reportUrl, "", "");
184
            if(text == null){
185
                continue;
186
            }
187

  
188
            JSONParser parser = new JSONParser();
189
            JSONObject jsonObject = (JSONObject) parser.parse(text);
190
            jsonObject = (JSONObject) jsonObject.get("ReportResponse");
191
            jsonObject = (JSONObject) jsonObject.get("Report");
192
            jsonObject = (JSONObject) jsonObject.get("Report");
193
            jsonObject = (JSONObject) jsonObject.get("Customer");
194
            JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
195
            if(jsonArray == null){
196
                continue;
197
            }
198
            String oai = "";
199
            for (Object aJsonArray : jsonArray) {
200
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
201
                JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
202
                for (Object identifier : itemIdentifier) {
203
                    JSONObject oaiPmh = (JSONObject) identifier;
204
                    if(oaiPmh.get("Type").toString().equals("OAI")){
205
                        oai = oaiPmh .get("Value").toString();
206
                        //System.out.println("OAI: " + oai);
207
                        break;
208
                    }
209
                }
210

  
211
                JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
212
                String period;
213
                String type;
214
                String count;
215
                for (Object perf : itemPerformance) {
216
                    JSONObject performance = (JSONObject) perf;
217
                    JSONObject periodObj = (JSONObject) performance.get("Period");
218
                    period = periodObj.get("Begin").toString();
219
                    JSONObject instanceObj = (JSONObject) performance.get("Instance");
220
                    type = instanceObj.get("MetricType").toString();
221
                    count = instanceObj.get("Count").toString();
222
                    //System.out.println(oai + " : " + period + " : " + count);
223

  
224
                    preparedStatement.setString(1, "IRUS-UK");
225
                    preparedStatement.setString(2, "opendoar____::" + opendoar);
226
                    preparedStatement.setString(3, oai);
227
                    preparedStatement.setString(4, period);
228
                    preparedStatement.setString(5, type);
229
                    preparedStatement.setInt(6, Integer.parseInt(count));
230
                    preparedStatement.addBatch();
231
                    batch_size++;
232
                    if(batch_size == 10000){
233
                        preparedStatement.executeBatch();
234
                        conn.commit();
235
                        batch_size = 0;
236
                    }
237
                }
238
                //break;
239
            }
240
            //break;
241
        }
242

  
243
        preparedStatement.executeBatch();
244
        conn.commit();
245
        conn.close();
246
    }
247

  
248
    private String getJson(String url,String username, String password) throws Exception {
249
        //String cred=username+":"+password;
250
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
251
        try {
252
            URL website = new URL(url);
253
            URLConnection connection = website.openConnection();
254
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
255
            StringBuilder response;
256
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
257
                response = new StringBuilder();
258
                String inputLine;
259
                while ((inputLine = in.readLine()) != null) {
260
                    response.append(inputLine);
261
                    response.append("\n");
262
                }
263
            }
264
            return response.toString();
265
        }catch (Exception e){
266
            log.error("Failed to get URL", e);
267
            return null;
268
        }
269
    }
270

  
271
    /*
272
    private void flushString(String data, String destination) throws Exception {
273
        FSDataOutputStream fin;
274
        try {
275
            FileSystem fs = FileSystem.get(new Configuration());
276
            fin = fs.create(new Path(destination), true);
277
            fin.write(data.getBytes());
278
            fin.close();
279
        } catch (Exception e) {
280
            log.error("Failed  to write exported data to a file : ", e);
281
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
282
        }
283
    }
284
    */
285
}
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v2/src/main/java/eu/dnetlib/usagestats/export/SarcStats.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
//import java.io.BufferedReader;
5
//import java.io.InputStreamReader;
6
import java.net.URL;
7
import java.net.URLConnection;
8
import java.sql.ResultSet;
9
import java.text.SimpleDateFormat;
10
import java.util.Date;
11
import java.util.Calendar;
12

  
13
import java.sql.Connection;
14
import java.sql.DriverManager;
15
import java.sql.PreparedStatement;
16
import java.sql.Statement;
17

  
18
import org.json.simple.JSONArray;
19
import org.json.simple.JSONObject;
20
import org.json.simple.parser.JSONParser;
21

  
22
import org.apache.log4j.Logger;
23
/**
24
 * Created by tsampikos on 27/6/2017.
25
 */
26

  
27
public class SarcStats {
28

  
29
    private final String dbUrl;
30
    private final String dbSchema;
31
    private final String dbUserName;
32
    private final String dbPassword;
33

  
34
    private Connection conn = null;
35
    private Statement stmt = null;
36

  
37
    private final Logger log = Logger.getLogger(this.getClass());
38

  
39
    public SarcStats(String dbUrl, String dbUsername, String dbPassword) throws Exception {
40
        this.dbUrl = dbUrl;
41
        this.dbSchema = "shadow";
42
        this.dbUserName = dbUsername;
43
        this.dbPassword = dbPassword;
44

  
45
        connectDB();
46
        createTables();
47
    }
48

  
49
    private void connectDB() throws Exception {
50
        try {
51
            Class.forName("org.postgresql.Driver");
52
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
53

  
54
            stmt = conn.createStatement();
55
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
56
            stmt.executeUpdate(sqlSetSearchPath);
57

  
58
            //log.info("Opened database successfully")
59

  
60
        } catch (Exception e) {
61
            //log.error("Connect to db failed: " + e);
62
            throw new Exception("Failed to connect to db: " + e.toString(), e);
63
        }
64
    }
65

  
66
    private void createTables() throws Exception {
67
        try {
68

  
69
            stmt = conn.createStatement();
70
            String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
71
            stmt.executeUpdate(sqlCreateTableSushiLog);
72

  
73
            //String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
74
            //stmt.executeUpdate(sqlCopyPublicSushiLog);
75

  
76
            String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
77
                    " ON INSERT TO sushilog " +
78
                    " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," +
79
                    "sushilog.rid, sushilog.date " +
80
                    "FROM sushilog " +
81
                    "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
82
            stmt.executeUpdate(sqlcreateRuleSushiLog);
83

  
84
            stmt.close();
85
            conn.close();
86
            log.info("Sushi Tables Created");
87
        } catch (Exception e) {
88
            log.error("Failed to create tables: " + e);
89
            throw new Exception("Failed to create tables: " + e.toString(), e);
90
        }
91
    }
92

  
93

  
94
    public void processSarc() throws Exception {
95
        processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
96
        processARReport("http://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
97
        processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
98
        processARReport("http://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
99
        processARReport("http://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
100
        //processARReport("http://revistas.rcaap.pt/boletimspm/sushiLite/v1_7/");
101
        processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
102
        processARReport("http://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
103
        processARReport("http://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
104
        processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
105
        processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
106
        processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
107
        processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
108
        processARReport("http://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
109
        processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
110
        processARReport("http://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
111
        processARReport("http://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
112
        processARReport("http://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
113

  
114
        //processARReport("http://www.bad.pt/publicacoes/index.php/cadernos/sushiLite/v1_7/");
115
    }
116

  
117
    public void sarcStats() throws Exception {
118
        if (conn.isClosed())
119
            connectDB();
120

  
121
        stmt = conn.createStatement();
122
        conn.setAutoCommit(false);
123

  
124
        //String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total'";
125
        String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, datasource_results dr, result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';";
126
        stmt.executeUpdate(sql);
127

  
128

  
129
        stmt.close();
130
        conn.commit();
131
        conn.close();
132
    }
133

  
134
    public void processARReport(String url, String issn) throws Exception {
135
        log.info("Processing SARC! issn: " + issn + " with url: " + url);
136
        if (conn.isClosed())
137
            connectDB();
138

  
139
        conn.setAutoCommit(false);
140

  
141
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
142

  
143
        Calendar start = Calendar.getInstance();
144
        start.set(Calendar.YEAR, 2016);
145
        start.set(Calendar.MONTH, Calendar.JANUARY);
146
        //start.setTime(simpleDateFormat.parse("2016-01"));
147

  
148
        Calendar end = Calendar.getInstance();
149
        end.add(Calendar.DAY_OF_MONTH, -1);
150

  
151
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
152
        PreparedStatement st = conn.prepareStatement("SELECT max(date) FROM public.sushilog WHERE repository=?;");
153
        st.setString(1, issn);
154
        ResultSet rs_date = st.executeQuery();
155
        while(rs_date.next()){
156
            if(rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
157
                start.setTime(sdf.parse(rs_date.getString(1)));
158
            }
159
        }
160
        rs_date.close();
161

  
162
        PreparedStatement preparedStatement = conn.prepareStatement("INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
163
        int batch_size = 0;
164

  
165
        while(start.before(end)){
166
            //String reportUrl = "http://irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) + "&RepositoryIdentifier=opendoar%3A" + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
167
            String reportUrl = url + "GetReport/?Report=AR1&Format=json&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime());
168
            //System.out.println(reportUrl);
169
            start.add(Calendar.MONTH, 1);
170

  
171
            String text = getJson(reportUrl, "", "");
172
            if(text == null){
173
                continue;
174
            }
175

  
176
            /*
177
            PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) + ".json"));
178
            wr.print(text);
179
            wr.close();
180
            */
181

  
182
            JSONParser parser = new JSONParser();
183
            JSONObject jsonObject = (JSONObject) parser.parse(text);
184
            jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse");
185
            jsonObject = (JSONObject) jsonObject.get("sc:Report");
186
            if(jsonObject == null){
187
                continue;
188
            }
189
            jsonObject = (JSONObject) jsonObject.get("c:Report");
190
            jsonObject = (JSONObject) jsonObject.get("c:Customer");
191
            Object obj = jsonObject.get("c:ReportItems");
192
            JSONArray jsonArray = new JSONArray();
193
            if(obj instanceof JSONObject){
194
                jsonArray.add(obj);
195
            }
196
            else{
197
                jsonArray = (JSONArray) obj;
198
                //jsonArray = (JSONArray) jsonObject.get("c:ReportItems");
199
            }
200
            if(jsonArray == null){
201
                continue;
202
            }
203

  
204
            String rid = "";
205
            for (Object aJsonArray : jsonArray) {
206
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
207
                JSONArray itemIdentifier = new JSONArray();
208
                obj = jsonObjectRow.get("c:ItemIdentifier");
209
                if(obj instanceof JSONObject){
210
                    itemIdentifier.add(obj);
211
                }
212
                else{
213
                    //JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
214
                    itemIdentifier = (JSONArray) obj;
215
                }
216
                for (Object identifier : itemIdentifier) {
217
                    JSONObject doi = (JSONObject) identifier;
218
                    if(doi.get("c:Type").toString().equals("DOI")){
219
                        rid = doi.get("c:Value").toString();
220
                        //System.out.println("DOI: " + rid);
221
                        break;
222
                    }
223
                }
224
                if(rid.isEmpty()){
225
                    continue;
226
                }
227

  
228
                JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
229
                //for (Object perf : itemPerformance) {
230
                JSONObject performance = (JSONObject) itemPerformance;
231
                JSONObject periodObj = (JSONObject) performance.get("c:Period");
232
                String period = periodObj.get("c:Begin").toString();
233
                JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
234
                String type = instanceObj.get("c:MetricType").toString();
235
                String count = instanceObj.get("c:Count").toString();
236
                //System.out.println(rid + " : " + period + " : " + count);
237

  
238
                preparedStatement.setString(1, "SARC-OJS");
239
                preparedStatement.setString(2, issn);
240
                //preparedStatement.setString(2, url);
241
                preparedStatement.setString(3, rid);
242
                preparedStatement.setString(4, period);
243
                preparedStatement.setString(5, type);
244
                preparedStatement.setInt(6, Integer.parseInt(count));
245
                preparedStatement.addBatch();
246
                batch_size++;
247
                if(batch_size == 10000){
248
                    preparedStatement.executeBatch();
249
                    conn.commit();
250
                    batch_size = 0;
251
                }
252
                //}
253

  
254
                //break;
255
            }
256
            //break;
257
        }
258

  
259
        preparedStatement.executeBatch();
260
        conn.commit();
261
        conn.close();
262
    }
263

  
264
    private String getJson(String url,String username, String password) throws Exception {
265
        //String cred=username+":"+password;
266
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
267
        try {
268
            URL website = new URL(url);
269
            URLConnection connection = website.openConnection();
270
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
271
            StringBuilder response;
272
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
273
                response = new StringBuilder();
274
                String inputLine;
275
                while ((inputLine = in.readLine()) != null) {
276
                    response.append(inputLine);
277
                    response.append("\n");
278
                }
279
            }
280
            return response.toString();
281
        }catch (Exception e){
282
            log.error("Failed to get URL: " + e);
283
            //System.out.println("Failed to get URL: " + e);
284
            return null;
285
            //throw new Exception("Failed to get URL: " + e.toString(), e);
286
        }
287
    }
288

  
289
    /*
290
    private void flushString(String data, String destination) throws Exception {
291
        FSDataOutputStream fin;
292
        try {
293
            FileSystem fs = FileSystem.get(new Configuration());
294
            fin = fs.create(new Path(destination), true);
295
            fin.write(data.getBytes());
296
            fin.close();
297
        } catch (Exception e) {
298
            log.error("Failed  to write exported data to a file : ", e);
299
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
300
        }
301
    }
302
    */
303
}
modules/dnet-openaire-usage-stats-export/branches/usage-stats-export-v2/src/main/java/eu/dnetlib/usagestats/export/PiwikStatsDB.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
import java.net.URLDecoder;
5
import java.sql.Connection;
6
import java.sql.DriverManager;
7
import java.sql.PreparedStatement;
8
import java.sql.Statement;
9
import java.sql.Timestamp;
10
import java.text.SimpleDateFormat;
11
import java.util.*;
12

  
13
import org.apache.hadoop.conf.Configuration;
14
import org.apache.hadoop.fs.LocatedFileStatus;
15
import org.apache.hadoop.fs.Path;
16
import org.apache.hadoop.fs.FileSystem;
17
import org.apache.hadoop.fs.RemoteIterator;
18
import org.apache.log4j.Logger;
19
import org.json.simple.JSONArray;
20
import org.json.simple.JSONObject;
21
import org.json.simple.parser.JSONParser;
22

  
23
public class PiwikStatsDB {
24
    private final String dbUrl;
25
    private final String dbSchema;
26
    private final String dbUserName;
27
    private final String dbPassword;
28
    private final String logPath;
29

  
30
    private Connection conn = null;
31
    private Statement stmt = null;
32

  
33
    private final Logger log = Logger.getLogger(this.getClass());
34

  
35
    public PiwikStatsDB(String dbUrl, String dbUserName, String dbPassword, String logPath) throws Exception {
36
        this.dbUrl = dbUrl;
37
        this.dbSchema = "shadow";
38
        this.dbUserName = dbUserName;
39
        this.dbPassword = dbPassword;
40
        this.logPath = logPath;
41

  
42
        connectDB();
43
        createTables();
44
    }
45

  
46
    private void connectDB() throws Exception {
47
        try {
48
            Class.forName("org.postgresql.Driver");
49
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
50

  
51
            stmt = conn.createStatement();
52
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
53
            stmt.executeUpdate(sqlSetSearchPath);
54

  
55
            //log.info("Opened database successfully");
56
        } catch (Exception e) {
57
            log.error("Connect to db failed: " + e);
58
            throw new Exception("Failed to connect to db: " + e.toString(), e);
59
        }
60
    }
61

  
62
    private void createTables() throws Exception {
63
        try {
64
            stmt = conn.createStatement();
65
            String sqlCreateTablePiwikLog = "CREATE TABLE IF NOT EXISTS piwiklog(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
66
            String sqlcreateRulePiwikLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
67
                    " ON INSERT TO piwiklog " +
68
                    " WHERE (EXISTS ( SELECT piwiklog.source, piwiklog.id_visit," +
69
                    "piwiklog.action, piwiklog.\"timestamp\", piwiklog.entity_id " +
70
                    "FROM piwiklog " +
71
                    "WHERE piwiklog.source = new.source AND piwiklog.id_visit = new.id_visit AND piwiklog.action = new.action AND piwiklog.entity_id = new.entity_id AND piwiklog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
72
            stmt.executeUpdate(sqlCreateTablePiwikLog);
73
            stmt.executeUpdate(sqlcreateRulePiwikLog);
74

  
75
            String sqlCopyPublicPiwiklog="insert into piwiklog select * from public.piwiklog;";
76
            stmt.executeUpdate(sqlCopyPublicPiwiklog);
77

  
78
            String sqlCreateTablePortalLog = "CREATE TABLE IF NOT EXISTS process_portal_log(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, timestamp));";
79
            String sqlcreateRulePortalLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
80
                    " ON INSERT TO process_portal_log " +
81
                    " WHERE (EXISTS ( SELECT process_portal_log.source, process_portal_log.id_visit," +
82
                    "process_portal_log.\"timestamp\" " +
83
                    "FROM process_portal_log " +
84
                    "WHERE process_portal_log.source = new.source AND process_portal_log.id_visit = new.id_visit AND process_portal_log.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
85
            stmt.executeUpdate(sqlCreateTablePortalLog);
86
            stmt.executeUpdate(sqlcreateRulePortalLog);
87

  
88
            stmt.close();
89
            conn.close();
90
            log.info("Usage Tables Created");
91

  
92
        } catch (Exception e) {
93
            log.error("Failed to create tables: " + e);
94
            throw new Exception("Failed to create tables: " + e.toString(), e);
95
            //System.exit(0);
96
        }
97
    }
98

  
99
    protected void processLogs() throws Exception {
100
        try {
101
            processRepositoryLog();
102
            log.info("repository process done");
103
            removeDoubleClicks();
104
            log.info("removing double clicks done");
105
            cleanOAI();
106
            log.info("cleaning oai done");
107

  
108
            processPortalLog();
109
            log.info("portal process done");
110
            portalStats();
111
            log.info("portal stats done");
112
        } catch (Exception e) {
113
            log.error("Failed to process logs: " + e);
114
            throw new Exception("Failed to process logs: " + e.toString(), e);
115
        }
116
    }
117

  
118
    protected void usageStats() throws Exception {
119
        try {
120
            //resultStats();
121
            //dataSourceStats();
122
            //organizationsStats();
123
            //projectsStats();
124
            //repositoryViewsStats();
125
            //repositoryDownloadsStats();
126
            viewsStats();
127
            downloadsStats();
128
            log.info("stat tables and views done");
129
        } catch (Exception e) {
130
            log.error("Failed to create usage stats: " + e);
131
            throw new Exception("Failed to create usage stats: " + e.toString(), e);
132
        }
133
    }
134

  
135
    // Import repository Logs to DB
136
    private void processRepositoryLog() throws Exception {
137
        if (conn.isClosed())
138
            connectDB();
139

  
140
        stmt = conn.createStatement();
141
        conn.setAutoCommit(false);
142

  
143
        ArrayList<String> jsonFiles = listHdfsDir(logPath + "repolog");
144

  
145
        PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO piwiklog (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
146
        int batch_size = 0;
147
        for (String jsonFile : jsonFiles) {
148
            JSONParser parser = new JSONParser();
149
            JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
150

  
151
            for (Object aJsonArray : jsonArray) {
152
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
153
                int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
154
                String idVisit = jsonObjectRow.get("idVisit").toString();
155
                String country = jsonObjectRow.get("country").toString();
156
                String referrerName = jsonObjectRow.get("referrerName").toString();
157
                String agent = jsonObjectRow.get("browser").toString();
158

  
159
                String sourceItemType = "repItem";
160

  
161
                JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
162
                for (Object actionDetail : actionDetails) {
163
                    JSONObject actionDetailsObj = (JSONObject) actionDetail;
164

  
165
                    if (actionDetailsObj.get("customVariables") != null) {
166
                        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
167
                        simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
168
                        Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
169
                        String url = actionDetailsObj.get("url").toString();
170
                        String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1")).get("customVariablePageValue1").toString();
171
                        String action = actionDetailsObj.get("type").toString();
172

  
173
                        prepStatem.setInt(1, idSite);
174
                        prepStatem.setString(2, idVisit);
175
                        prepStatem.setString(3, country);
176
                        prepStatem.setString(4, action);
177
                        prepStatem.setString(5, url);
178
                        prepStatem.setString(6, oaipmh);
179
                        prepStatem.setString(7, sourceItemType);
180
                        prepStatem.setString(8, simpleDateFormat.format(timestamp));
181
                        prepStatem.setString(9, referrerName);
182
                        prepStatem.setString(10, agent);
183
                        prepStatem.addBatch();
184
                        batch_size++;
185
                        if (batch_size == 10000) {
186
                            prepStatem.executeBatch();
187
                            conn.commit();
188
                            batch_size = 0;
189
                        }
190
                    }
191
                }
192
            }
193
        }
194
        prepStatem.executeBatch();
195
        conn.commit();
196
        stmt.close();
197
        conn.close();
198
    }
199

  
200
    private void removeDoubleClicks() throws Exception {
201
        if (conn.isClosed())
202
            connectDB();
203

  
204
        stmt = conn.createStatement();
205
        conn.setAutoCommit(false);
206

  
207
        //clean download double clicks
208
        String sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<30 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);";
209
        stmt.executeUpdate(sql);
210
        stmt.close();
211
        conn.commit();
212

  
213
        stmt = conn.createStatement();
214

  
215
        //clean view double clicks
216
        sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp from piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<10 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);";
217
        stmt.executeUpdate(sql);
218
        stmt.close();
219
        conn.commit();
220
        conn.close();
221
    }
222

  
223
    private void viewsStats() throws Exception {
224
        if (conn.isClosed())
225
            connectDB();
226

  
227
        stmt = conn.createStatement();
228
        conn.setAutoCommit(false);
229

  
230
        //String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
231
        String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
232
        stmt.executeUpdate(sql);
233

  
234
        // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
235
        sql = "CREATE TABLE IF NOT EXISTS views_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
236
        stmt.executeUpdate(sql);
237

  
238
//        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
239
        sql = "CREATE TABLE IF NOT EXISTS pageviews_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
240
        stmt.executeUpdate(sql);
241

  
242
        sql = "DROP VIEW IF EXISTS result_views_monthly;";
243
        stmt.executeUpdate(sql);
244

  
245
        stmt.close();
246
        conn.commit();
247
        conn.close();
248
    }
249

  
250
    private void downloadsStats() throws Exception {
251
        if (conn.isClosed())
252
            connectDB();
253

  
254
        stmt = conn.createStatement();
255
        conn.setAutoCommit(false);
256

  
257
        //String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
258
        String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
259
        stmt.executeUpdate(sql);
260

  
261
        //sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
262
//        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
263
        sql = "CREATE TABLE IF NOT EXISTS downloads_stats AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
264
        stmt.executeUpdate(sql);
265

  
266
        sql = "DROP VIEW IF EXISTS result_downloads_monthly;";
267
        stmt.executeUpdate(sql);
268

  
269
        stmt.close();
270
        conn.commit();
271
        conn.close();
272
    }
273

  
274
    public void finalizeStats() throws Exception {
275
        if (conn.isClosed())
276
            connectDB();
277

  
278
        stmt = conn.createStatement();
279
        conn.setAutoCommit(false);
280

  
281
        Calendar startCalendar = Calendar.getInstance();
282
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
283
        Calendar endCalendar = Calendar.getInstance();
284
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
285
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
286

  
287
//        String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
288
        String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
289
        stmt.executeUpdate(sql);
290

  
291
        sql = "CREATE INDEX full_dates_full_date ON full_dates USING btree(full_date);";
292
        stmt.executeUpdate(sql);
293

  
294

  
295
        sql = "CREATE INDEX views_stats_source ON views_stats USING btree(source);";
296
        stmt.executeUpdate(sql);
297

  
298
        sql = "CREATE INDEX views_stats_repository_id ON views_stats USING btree(repository_id);";
299
        stmt.executeUpdate(sql);
300

  
301
        sql = "CREATE INDEX views_stats_result_id ON views_stats USING btree(result_id);";
302
        stmt.executeUpdate(sql);
303

  
304
        sql = "CREATE INDEX views_stats_date ON views_stats USING btree(date);";
305
        stmt.executeUpdate(sql);
306

  
307

  
308
        sql = "CREATE INDEX pageviews_stats_source ON pageviews_stats USING btree(source);";
309
        stmt.executeUpdate(sql);
310

  
311
        sql = "CREATE INDEX pageviews_stats_repository_id ON pageviews_stats USING btree(repository_id);";
312
        stmt.executeUpdate(sql);
313

  
314
        sql = "CREATE INDEX pageviews_stats_result_id ON pageviews_stats USING btree(result_id);";
315
        stmt.executeUpdate(sql);
316

  
317
        sql = "CREATE INDEX pageviews_stats_date ON pageviews_stats USING btree(date);";
318
        stmt.executeUpdate(sql);
319

  
320

  
321
        sql = "CREATE INDEX downloads_stats_source ON downloads_stats USING btree(source);";
322
        stmt.executeUpdate(sql);
323

  
324
        sql = "CREATE INDEX downloads_stats_repository_id ON downloads_stats USING btree(repository_id);";
325
        stmt.executeUpdate(sql);
326

  
327
        sql = "CREATE INDEX downloads_stats_result_id ON downloads_stats USING btree(result_id);";
328
        stmt.executeUpdate(sql);
329

  
330
        sql = "CREATE INDEX downloads_stats_date ON downloads_stats USING btree(date);";
331
        stmt.executeUpdate(sql);
332

  
333
//        sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
334
        sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;";
335
        stmt.executeUpdate(sql);
336

  
337
        sql = "CREATE INDEX usage_stats_source ON usage_stats USING btree(source);";
338
        stmt.executeUpdate(sql);
339

  
340
        sql = "CREATE INDEX usage_stats_repository_id ON usage_stats USING btree(repository_id);";
341
        stmt.executeUpdate(sql);
342

  
343
        sql = "CREATE INDEX usage_stats_result_id ON usage_stats USING btree(result_id);";
344
        stmt.executeUpdate(sql);
345

  
346
        sql = "CREATE INDEX usage_stats_date ON usage_stats USING btree(date);";
347
        stmt.executeUpdate(sql);
348

  
349
        stmt.close();
350
        conn.commit();
351
        conn.close();
352
    }
353

  
354
    //views stats
355
//    private void viewsStatsOLD() throws Exception {
356
//        if (conn.isClosed())
357
//            connectDB();
358
//
359
//        stmt = conn.createStatement();
360
//        conn.setAutoCommit(false);
361
//
362
//        Calendar startCalendar = Calendar.getInstance();
363
//        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
364
//        Calendar endCalendar = Calendar.getInstance();
365
//        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
366
//        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
367
//
368
//        String sql = "CREATE OR REPLACE VIEW result_views_monthly as select entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
369
//        stmt.executeUpdate(sql);
370
//
371
//        sql = "SELECT d.id, d.new_date AS month, CASE when rdm.views IS NULL THEN 0 ELSE rdm.views END, d.source INTO result_views_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_views_monthly rdsm) d LEFT JOIN (SELECT id, month, views, source FROM result_views_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;";
372
//        stmt.executeUpdate(sql);
373
//
374
//        sql = "CREATE INDEX result_views_sushi_id ON result_views_sushi USING btree (id);";
375
//        stmt.executeUpdate(sql);
376
//
377
//        sql = "CREATE INDEX result_views_sushi_month ON result_views_sushi USING btree (month);";
378
//        stmt.executeUpdate(sql);
379
//
380
//        sql = "CREATE INDEX result_views_sushi_source ON result_views_sushi USING btree (source);";
381
//        stmt.executeUpdate(sql);
382
//
383
//        sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, max(views) as views, source INTO result_views FROM result_views_sushi rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY source, roid.id, month ORDER BY source, roid.id, month;";
384
//        stmt.executeUpdate(sql);
385
//
386
//        sql = "CREATE INDEX result_views_id ON result_views USING btree (id);";
387
//        stmt.executeUpdate(sql);
388
//
389
//        sql = "CREATE INDEX result_views_month ON result_views USING btree (month);";
390
//        stmt.executeUpdate(sql);
391
//
392
//        sql = "CREATE INDEX result_views_source ON result_views USING btree (source);";
393
//        stmt.executeUpdate(sql);
394
//
395
//
396
//        sql = "DROP VIEW IF EXISTS result_views_monthly;";
397
//        stmt.executeUpdate(sql);
398
//
399
//        stmt.close();
400
//        conn.commit();
401
//        conn.close();
402
//    }
403

  
404
    //downloads stats
405
//    private void downloadsStatsOLD() throws Exception {
406
//        if (conn.isClosed())
407
//            connectDB();
408
//
409
//        stmt = conn.createStatement();
410
//        conn.setAutoCommit(false);
411
//
412
//        Calendar startCalendar = Calendar.getInstance();
413
//        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
414
//        Calendar endCalendar = Calendar.getInstance();
415
//        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
416
//        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
417
//
418
//        String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
419
//        stmt.executeUpdate(sql);
420
//
421
//        sql = "SELECT d.id, d.new_date AS month, CASE when rdm.downloads IS NULL THEN 0 ELSE rdm.downloads END, d.source INTO result_downloads_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_downloads_monthly rdsm) d LEFT JOIN (SELECT id, month, downloads, source FROM result_downloads_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;";
422
//        stmt.executeUpdate(sql);
423
//
424
//        sql = "CREATE INDEX result_downloads_sushi_id ON result_downloads_sushi USING btree (id);";
425
//        stmt.executeUpdate(sql);
426
//
427
//        sql = "CREATE INDEX result_downloads_sushi_month ON result_downloads_sushi USING btree (month);";
428
//        stmt.executeUpdate(sql);
429
//
430
//        sql = "CREATE INDEX result_downloads_sushi_source ON result_downloads_sushi USING btree (source);";
431
//        stmt.executeUpdate(sql);
432
//
433
//        sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, downloads, source INTO result_downloads FROM result_downloads_sushi rvs, result_oids roid WHERE rvs.id=roid.orid ORDER BY source, roid.id, month;";
434
//        stmt.executeUpdate(sql);
435
//
436
//        sql = "CREATE INDEX result_downloads_id ON result_downloads USING btree (id);";
437
//        stmt.executeUpdate(sql);
438
//
439
//        sql = "CREATE INDEX result_downloads_month ON result_downloads USING btree (month);";
440
//        stmt.executeUpdate(sql);
441
//
442
//        sql = "CREATE INDEX result_downloads_source ON result_downloads USING btree (source);";
443
//        stmt.executeUpdate(sql);
444
//
445
//
446
//        sql = "DROP VIEW IF EXISTS result_downloads_monthly;";
447
//        stmt.executeUpdate(sql);
448
//
449
//        stmt.close();
450
//        conn.commit();
451
//        conn.close();
452
//    }
453

  
454
    //Create repository Views statistics
455
    private void repositoryViewsStats() throws Exception {
456
        if (conn.isClosed())
457
            connectDB();
458

  
459
        stmt = conn.createStatement();
460
        conn.setAutoCommit(false);
461

  
462
//        String sql = "SELECT entity_id AS id , COUNT(entity_id) AS number_of_views, timestamp::date AS date, source INTO repo_view_stats FROM piwiklog WHERE source!='5' AND action=\'action\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
463
        String sql = "CREATE TABLE IF NOT EXISTS repo_view_stats AS SELECT entity_id AS id , COUNT(entity_id) AS number_of_views, timestamp::date AS date, source FROM piwiklog WHERE source!='5' AND action=\'action\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
464
        stmt.executeUpdate(sql);
465

  
466
        sql = "CREATE INDEX repo_view_stats_id ON repo_view_stats USING btree (id)";
467
        stmt.executeUpdate(sql);
468

  
469
        sql = "CREATE INDEX repo_view_stats_date ON repo_view_stats USING btree(date)";
470
        stmt.executeUpdate(sql);
471

  
472
//        sql = "SELECT roid.id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_view_stats_monthly_clean FROM repo_view_stats rvs, result_oids roid where rvs.id=roid.orid group by roid.id, month, source;";
473
        sql = "CREATE TABLE IF NOT EXISTS repo_view_stats_monthly_clean AS SELECT roid.id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source FROM repo_view_stats rvs, result_oids roid where rvs.id=roid.orid group by roid.id, month, source;";
474
        stmt.executeUpdate(sql);
475

  
476
        sql = "CREATE INDEX repo_view_stats_monthly_clean_id ON repo_view_stats_monthly_clean USING btree (id)";
477
        stmt.executeUpdate(sql);
478

  
479
        sql = "CREATE INDEX repo_view_stats_monthly_clean_month ON repo_view_stats_monthly_clean USING btree(month)";
480
        stmt.executeUpdate(sql);
481

  
482
        sql = "CREATE INDEX repo_view_stats_monthly_clean_source ON repo_view_stats_monthly_clean USING btree(source)";
483
        stmt.executeUpdate(sql);
484

  
485
        Calendar startCalendar = Calendar.getInstance();
486
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
487
        Calendar endCalendar = Calendar.getInstance();
488
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
489
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
490

  
491
        //sql="CREATE OR REPLACE view repo_view_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
492
//        sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_view_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
493
        sql = "CREATE TABLE IF NOT EXISTS repo_view_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
494
        stmt.executeUpdate(sql);
495

  
496
        sql = "CREATE INDEX repo_view_stats_monthly_id ON repo_view_stats_monthly USING btree (id)";
497
        stmt.executeUpdate(sql);
498

  
499
        sql = "CREATE INDEX repo_view_stats_monthly_month ON repo_view_stats_monthly USING btree(month)";
500
        stmt.executeUpdate(sql);
501

  
502
        sql = "CREATE INDEX repo_view_stats_monthly_source ON repo_view_stats_monthly USING btree(source)";
503
        stmt.executeUpdate(sql);
504

  
505
        sql = "CREATE OR REPLACE view repo_view_stats_monthly_sushi AS SELECT id, sum(number_of_views), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_view_stats group by id, month, source;";
506
        stmt.executeUpdate(sql);
507

  
508
        stmt.close();
509
        conn.commit();
510
        conn.close();
511
    }
512

  
513
    //Create repository downloads statistics
514
    private void repositoryDownloadsStats() throws Exception {
515
        if (conn.isClosed())
516
            connectDB();
517

  
518
        stmt = conn.createStatement();
519
        conn.setAutoCommit(false);
520

  
521
//        String sql = "SELECT entity_id AS id, COUNT(entity_id) AS number_of_downloads, timestamp::date AS date, source INTO repo_download_stats FROM piwiklog WHERE source!='5' AND action=\'download\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
522
        String sql = "CREATE TABLE IF NOT EXISTS repo_download_stats AS SELECT entity_id AS id, COUNT(entity_id) AS number_of_downloads, timestamp::date AS date, source FROM piwiklog WHERE source!='5' AND action=\'download\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
523
        stmt.executeUpdate(sql);
524

  
525
        sql = "CREATE INDEX repo_download_stats_id ON repo_download_stats USING btree (id)";
526
        stmt.executeUpdate(sql);
527

  
528
        sql = "CREATE INDEX repo_download_stats_date ON repo_download_stats USING btree(date)";
529
        stmt.executeUpdate(sql);
530

  
531
//        sql = "SELECT roid.id, sum(number_of_downloads), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_download_stats_monthly_clean FROM repo_download_stats rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY roid.id, month, source;";
532
        sql = "CREATE TABLE IF NOT EXISTS repo_download_stats_monthly_clean AS SELECT roid.id, sum(number_of_downloads), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source FROM repo_download_stats rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY roid.id, month, source;";
533
        stmt.executeUpdate(sql);
534

  
535
        sql = "CREATE INDEX repo_download_stats_monthly_clean_id ON repo_download_stats_monthly_clean USING btree (id)";
536
        stmt.executeUpdate(sql);
537

  
538
        sql = "CREATE INDEX repo_download_stats_monthly_clean_month ON repo_download_stats_monthly_clean USING btree(month)";
539
        stmt.executeUpdate(sql);
540

  
541
        sql = "CREATE INDEX repo_download_stats_monthly_clean_source ON repo_download_stats_monthly_clean USING btree(source)";
542
        stmt.executeUpdate(sql);
543

  
544
        Calendar startCalendar = Calendar.getInstance();
545
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
546
        Calendar endCalendar = Calendar.getInstance();
547
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
548
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
549

  
550
        //sql="CREATE OR REPLACE view repo_download_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
551
        // sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_download_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
552
        sql = "CREATE TABLE IF NOT EXISTS repo_download_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
553
        stmt.executeUpdate(sql);
554

  
555
        sql = "CREATE INDEX repo_download_stats_monthly_id ON repo_download_stats_monthly USING btree (id)";
556
        stmt.executeUpdate(sql);
557

  
558
        sql = "CREATE INDEX repo_download_stats_monthly_month ON repo_download_stats_monthly USING btree(month)";
559
        stmt.executeUpdate(sql);
560

  
561
        sql = "CREATE INDEX repo_download_stats_monthly_source ON repo_download_stats_monthly USING btree(source)";
562
        stmt.executeUpdate(sql);
563

  
564

  
565
        sql = "CREATE OR REPLACE view repo_download_stats_monthly_sushi AS SELECT id, sum(number_of_downloads), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_download_stats group by id, month, source;";
566
        stmt.executeUpdate(sql);
567

  
568
        stmt.close();
569
        conn.commit();
570
        conn.close();
571
    }
572

  
573
    // Import OPENAIRE Logs to DB
574
    private void processPortalLog() throws Exception {
575

  
576
        if (conn.isClosed())
577
            connectDB();
578

  
579
        stmt = conn.createStatement();
580
        conn.setAutoCommit(false);
581

  
582
        ArrayList<String> jsonFiles = listHdfsDir(logPath + "portallog");
583
        PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO process_portal_log (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
584
        int batch_size = 0;
585
        JSONParser parser = new JSONParser();
586
        for (String jsonFile : jsonFiles) {
587
            JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
588

  
589
            for (Object aJsonArray : jsonArray) {
590
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
591
                int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
592
                String idVisit = jsonObjectRow.get("idVisit").toString();
593
                String country = jsonObjectRow.get("country").toString();
594
                String referrerName = jsonObjectRow.get("referrerName").toString();
595
                String agent = jsonObjectRow.get("browser").toString();
596

  
597
                JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
598
                for (Object actionDetail : actionDetails) {
599
                    JSONObject actionDetailsObj = (JSONObject) actionDetail;
600

  
601
                    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
602
                    simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
603
                    Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
604

  
605
                    String action = actionDetailsObj.get("type").toString();
606
                    String url = actionDetailsObj.get("url").toString();
607

  
608
                    String entityID = processPortalURL(url);
609
                    String sourceItemType = "";
610

  
611
                    if (entityID.indexOf("|") > 0) {
612
                        sourceItemType = entityID.substring(0, entityID.indexOf("|"));
613
                        entityID = entityID.substring(entityID.indexOf("|") + 1);
614
                    }
615

  
616
                    prepStatem.setInt(1, idSite);
617
                    prepStatem.setString(2, idVisit);
618
                    prepStatem.setString(3, country);
619
                    prepStatem.setString(4, action);
620
                    prepStatem.setString(5, url);
621
                    prepStatem.setString(6, entityID);
622
                    prepStatem.setString(7, sourceItemType);
623
                    prepStatem.setString(8, simpleDateFormat.format(timestamp));
624
                    prepStatem.setString(9, referrerName);
625
                    prepStatem.setString(10, agent);
626

  
627
                    prepStatem.addBatch();
628
                    batch_size++;
629
                    if (batch_size == 10000) {
630
                        prepStatem.executeBatch();
631
                        conn.commit();
632
                        batch_size = 0;
633
                    }
634
                }
635
            }
636
        }
637
        prepStatem.executeBatch();
638
        conn.commit();
639

  
640
        stmt.close();
641
        conn.close();
642
    }
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff