Project

General

Profile

« Previous | Next » 

Revision 47499

Added by Tsampikos Livisianos over 5 years ago

dnet45

View differences:

modules/dnet-openaire-usage-stats-export/trunk/src/main/java/eu/dnetlib/usagestats/export/PiwikDownloadLogs.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

  
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.PreparedStatement;
15
import java.sql.ResultSet;
16
import java.sql.Statement;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.Calendar;
20

  
21
public class PiwikDownloadLogs {
22

  
23
    private final String piwikUsername;
24
    private final String piwikPassword;
25
    private final String httpProtocol;
26
    private final String piwikUrl;
27
    private final Date startDate;
28
    private final String tokenAuth;
29
    private final String logsPath;
30

  
31
    private final String dbUrl;
32
    private final String dbUserName;
33
    private final String dbPassword;
34

  
35
    /*
36
       The Piwik's API method 
37
    */
38
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
39
    private final String format = "&format=json";
40

  
41
    private final Logger log = Logger.getLogger(this.getClass());
42

  
43

  
44
    public PiwikDownloadLogs(String username, String password, String tokenAuth, String httpProtocol, String piwikURl, String sDate, String logsPath, String dbUrl, String dbUsername, String dbPassword) throws Exception{
45
        this.piwikUsername = username;
46
        this.piwikPassword = password;
47
        this.httpProtocol = httpProtocol;
48
        this.piwikUrl = piwikURl;
49

  
50
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
51
        this.startDate = sdf.parse(sDate);
52

  
53
        this.tokenAuth = tokenAuth;
54
        this.logsPath = logsPath;
55
        this.dbUrl = dbUrl;
56
        this.dbUserName = dbUsername;
57
        this.dbPassword = dbPassword;
58
    }
59

  
60
    private String getPiwikLogUrl(){
61
        return httpProtocol + "://" + piwikUrl + "/";
62
    }
63

  
64
    private String getJson(String url,String username, String password) throws Exception {
65
        //String cred=username+":"+password;
66
        //String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
67
        try {
68
            URL website = new URL(url);
69
            URLConnection connection = website.openConnection();
70

  
71
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
72
            StringBuilder response;
73
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
74
                response = new StringBuilder();
75
                String inputLine;
76
                while ((inputLine = in.readLine()) != null) {
77
                    response.append(inputLine);
78
                    response.append("\n");
79
                }
80
            }
81
            return response.toString();
82
        }catch (Exception e){
83
            log.error("Failed to get URL: " + e);
84
            throw new Exception("Failed to get URL: " + e.toString(), e);
85
        }
86
    }
87

  
88
    public void getPiwikLogs() throws Exception{
89
        GetPortalLogs();
90
        GetRepositoriesLogs();
91
    }
92

  
93
    private void GetPortalLogs() throws Exception{
94

  
95
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
96
        Calendar start = Calendar.getInstance();
97
        start.setTime(startDate);
98
        Calendar end = Calendar.getInstance();
99
        end.add(Calendar.DAY_OF_MONTH, -1);
100
        //end.setTime(getFinalDate());
101

  
102
        try{
103
            log.info("downloading logs for site with piwik_id: 5");
104
            Class.forName("org.postgresql.Driver");
105
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
106
            PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source='5';");
107
            ResultSet rs_date = st.executeQuery();
108
            while(rs_date.next()){
109
                start.setTime(sdf.parse(rs_date.getString(1)));
110
            }
111
            rs_date.close();
112
            conn.close();
113
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
114

  
115
                String period="&period=day&date="+sdf.format(date);
116

  
117
                String apiUrl=getPiwikLogUrl()+APImethod+"&idSite=5"+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
118
                String content = getJson(apiUrl,piwikUsername,piwikPassword);
119

  
120
                //for (int i=1;i<10;i++){
121
                int i = 1;
122
                while(true) {
123
                    String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
124
                    String contentNew = getJson(apiUrlnew,piwikUsername,piwikUsername);
125
                    content += contentNew;
126
                    i++;
127
                    if(contentNew.equals("[]\n")){
128
                        break;
129
                    }
130
                }
131
                flushString(content, logsPath + "portallog/" + "5_Piwiklog"+sdf.format((date))+".json");
132
            }
133
        } catch (Exception e) {
134
            log.error(e);
135
            throw new Exception("Failed to get portal logs: " + e.toString(), e);
136
        }
137
    }
138

  
139
    private void GetRepositoriesLogs() throws Exception{
140

  
141
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
142
        Calendar start = Calendar.getInstance();
143
        start.setTime(startDate);
144
        Calendar end = Calendar.getInstance();
145
        end.add(Calendar.DAY_OF_MONTH, -1);
146
        //end.setTime(getFinalDate());
147

  
148
        try{
149
            Class.forName("org.postgresql.Driver");
150
            Connection conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
151
            Statement statement = conn.createStatement();
152
            ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from shadow.datasource where piwik_id is not null and piwik_id!='5' order by piwik_id;");
153
            while(rs.next()){
154
                start.setTime(startDate);
155
                int siteId = rs.getInt(1);
156
                log.info("downloading logs for site with piwik_id: " + siteId);
157
                PreparedStatement st = conn.prepareStatement("SELECT max(timestamp) FROM public.piwiklog WHERE source=?;");
158
                st.setInt(1, siteId);
159
                ResultSet rs_date = st.executeQuery();
160
                while(rs_date.next()){
161
                    //log.info("source: " + siteId + " - date: " + rs_date.getString(1));
162
                    if(rs_date.getString(1) == null || rs_date.getString(1).equals("null") || rs_date.getString(1).equals("")) {
163
                        start = Calendar.getInstance();
164
                        start.add(Calendar.MONTH, -1);
165
                    }
166
                    else {
167
                        start.setTime(sdf.parse(rs_date.getString(1)));
168
                    }
169
                }
170
                rs_date.close();
171

  
172
                for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
173

  
174
                    String period="&period=day&date="+sdf.format(date);
175

  
176
                    String apiUrl=getPiwikLogUrl()+APImethod+"&idSite="+siteId+period+format+"&expanded=5&filter_limit=1000&token_auth="+tokenAuth;
177
                    String content = getJson(apiUrl,piwikUsername,piwikPassword);
178

  
179
                    //for (int i=1;i<10;i++){
180
                    int i=1;
181
                    while(true) {
182
                        String apiUrlnew=apiUrl+"&filter_offset="+i*1000;
183
                        String contentNew = getJson(apiUrlnew,piwikUsername,piwikPassword);
184
                        content += contentNew;
185
                        i++;
186
                        if(contentNew.equals("[]\n")){
187
                            break;
188
                        }
189
                    }
190
                    flushString(content, logsPath + "repolog/" + siteId + "_Piwiklog"+sdf.format((date))+".json");
191

  
192
                }
193
            }
194
            rs.close();
195
            conn.close();
196
        } catch (Exception e) {
197
            log.error(e);
198
            throw new Exception("Failed to get repository logs: " + e.toString(), e);
199
        }
200
    }
201

  
202
    private void flushString(String data, String destination) throws Exception {
203
        FSDataOutputStream fin;
204
        try {
205
            FileSystem fs = FileSystem.get(new Configuration());
206
            fin = fs.create(new Path(destination), true);
207
            fin.write(data.getBytes());
208
            fin.close();
209
        } catch (Exception e) {
210
            log.error("Failed  to write exported data to a file : ", e);
211
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
212
        }
213
    }
214
}
modules/dnet-openaire-usage-stats-export/trunk/src/main/java/eu/dnetlib/usagestats/export/PiwikStatsDB.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
import java.net.URLDecoder;
5
import java.sql.Connection;
6
import java.sql.DriverManager;
7
import java.sql.PreparedStatement;
8
import java.sql.Statement;
9
import java.sql.Timestamp;
10
import java.text.SimpleDateFormat;
11
import java.util.*;
12

  
13
import org.apache.hadoop.conf.Configuration;
14
import org.apache.hadoop.fs.LocatedFileStatus;
15
import org.apache.hadoop.fs.Path;
16
import org.apache.hadoop.fs.FileSystem;
17
import org.apache.hadoop.fs.RemoteIterator;
18
import org.apache.log4j.Logger;
19
import org.json.simple.JSONArray;
20
import org.json.simple.JSONObject;
21
import org.json.simple.parser.JSONParser;
22

  
23
public class PiwikStatsDB {
24
    private final String dbUrl;
25
    private final String dbSchema;
26
    private final String dbUserName;
27
    private final String dbPassword;
28
    private final String logPath;
29

  
30
    private Connection conn = null;
31
    private Statement stmt = null;
32

  
33
    private final Logger log = Logger.getLogger(this.getClass());
34

  
35
    public PiwikStatsDB(String dbUrl, String dbUserName, String dbPassword, String logPath) throws Exception {
36
        this.dbUrl = dbUrl;
37
        this.dbSchema = "shadow";
38
        this.dbUserName = dbUserName;
39
        this.dbPassword = dbPassword;
40
        this.logPath = logPath;
41

  
42
        connectDB();
43
        createTables();
44
    }
45

  
46
    private void connectDB() throws Exception {
47
        try {
48
            Class.forName("org.postgresql.Driver");
49
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
50

  
51
            stmt = conn.createStatement();
52
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
53
            stmt.executeUpdate(sqlSetSearchPath);
54

  
55
            //log.info("Opened database successfully");
56
        } catch (Exception e) {
57
            log.error("Connect to db failed: " + e);
58
            throw new Exception("Failed to connect to db: " + e.toString(), e);
59
        }
60
    }
61

  
62
    private void createTables() throws Exception {
63
        try {
64
            stmt = conn.createStatement();
65
            String sqlCreateTablePiwikLog = "CREATE TABLE IF NOT EXISTS piwiklog(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
66
            String sqlcreateRulePiwikLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
67
                    " ON INSERT TO piwiklog " +
68
                    " WHERE (EXISTS ( SELECT piwiklog.source, piwiklog.id_visit," +
69
                    "piwiklog.action, piwiklog.\"timestamp\", piwiklog.entity_id " +
70
                    "FROM piwiklog " +
71
                    "WHERE piwiklog.source = new.source AND piwiklog.id_visit = new.id_visit AND piwiklog.action = new.action AND piwiklog.entity_id = new.entity_id AND piwiklog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
72
            stmt.executeUpdate(sqlCreateTablePiwikLog);
73
            stmt.executeUpdate(sqlcreateRulePiwikLog);
74

  
75
            String sqlCopyPublicPiwiklog="insert into piwiklog select * from public.piwiklog;";
76
            stmt.executeUpdate(sqlCopyPublicPiwiklog);
77

  
78
            String sqlCreateTablePortalLog = "CREATE TABLE IF NOT EXISTS process_portal_log(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, timestamp));";
79
            String sqlcreateRulePortalLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
80
                    " ON INSERT TO process_portal_log " +
81
                    " WHERE (EXISTS ( SELECT process_portal_log.source, process_portal_log.id_visit," +
82
                    "process_portal_log.\"timestamp\" " +
83
                    "FROM process_portal_log " +
84
                    "WHERE process_portal_log.source = new.source AND process_portal_log.id_visit = new.id_visit AND process_portal_log.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
85
            stmt.executeUpdate(sqlCreateTablePortalLog);
86
            stmt.executeUpdate(sqlcreateRulePortalLog);
87

  
88
            stmt.close();
89
            conn.close();
90
            log.info("Usage Tables Created");
91

  
92
        } catch (Exception e) {
93
            log.error("Failed to create tables: " + e);
94
            throw new Exception("Failed to create tables: " + e.toString(), e);
95
            //System.exit(0);
96
        }
97
    }
98

  
99
    protected void processLogs() throws Exception {
100
        try {
101
            processRepositoryLog();
102
            log.info("repository process done");
103
            removeDoubleClicks();
104
            log.info("removing double clicks done");
105
            cleanOAI();
106
            log.info("cleaning oai done");
107

  
108
            processPortalLog();
109
            log.info("portal process done");
110
            portalStats();
111
            log.info("portal stats done");
112
        } catch (Exception e) {
113
            log.error("Failed to process logs: " + e);
114
            throw new Exception("Failed to process logs: " + e.toString(), e);
115
        }
116
    }
117

  
118
    protected void usageStats() throws Exception {
119
        try {
120
            //resultStats();
121
            //dataSourceStats();
122
            //organizationsStats();
123
            //projectsStats();
124
            //repositoryViewsStats();
125
            //repositoryDownloadsStats();
126
            viewsStats();
127
            downloadsStats();
128
            log.info("stat tables and views done");
129
        } catch (Exception e) {
130
            log.error("Failed to create usage stats: " + e);
131
            throw new Exception("Failed to create usage stats: " + e.toString(), e);
132
        }
133
    }
134

  
135
    // Import repository Logs to DB
136
    private void processRepositoryLog() throws Exception {
137
        if (conn.isClosed())
138
            connectDB();
139

  
140
        stmt = conn.createStatement();
141
        conn.setAutoCommit(false);
142

  
143
        ArrayList<String> jsonFiles = listHdfsDir(logPath + "repolog");
144

  
145
        PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO piwiklog (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
146
        int batch_size = 0;
147
        for (String jsonFile : jsonFiles) {
148
            JSONParser parser = new JSONParser();
149
            JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
150

  
151
            for (Object aJsonArray : jsonArray) {
152
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
153
                int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
154
                String idVisit = jsonObjectRow.get("idVisit").toString();
155
                String country = jsonObjectRow.get("country").toString();
156
                String referrerName = jsonObjectRow.get("referrerName").toString();
157
                String agent = jsonObjectRow.get("browser").toString();
158

  
159
                String sourceItemType = "repItem";
160

  
161
                JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
162
                for (Object actionDetail : actionDetails) {
163
                    JSONObject actionDetailsObj = (JSONObject) actionDetail;
164

  
165
                    if (actionDetailsObj.get("customVariables") != null) {
166
                        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
167
                        simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
168
                        Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
169
                        String url = actionDetailsObj.get("url").toString();
170
                        String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1")).get("customVariablePageValue1").toString();
171
                        String action = actionDetailsObj.get("type").toString();
172

  
173
                        prepStatem.setInt(1, idSite);
174
                        prepStatem.setString(2, idVisit);
175
                        prepStatem.setString(3, country);
176
                        prepStatem.setString(4, action);
177
                        prepStatem.setString(5, url);
178
                        prepStatem.setString(6, oaipmh);
179
                        prepStatem.setString(7, sourceItemType);
180
                        prepStatem.setString(8, simpleDateFormat.format(timestamp));
181
                        prepStatem.setString(9, referrerName);
182
                        prepStatem.setString(10, agent);
183
                        prepStatem.addBatch();
184
                        batch_size++;
185
                        if (batch_size == 10000) {
186
                            prepStatem.executeBatch();
187
                            conn.commit();
188
                            batch_size = 0;
189
                        }
190
                    }
191
                }
192
            }
193
        }
194
        prepStatem.executeBatch();
195
        conn.commit();
196
        stmt.close();
197
        conn.close();
198
    }
199

  
200
    private void removeDoubleClicks() throws Exception {
201
        if (conn.isClosed())
202
            connectDB();
203

  
204
        stmt = conn.createStatement();
205
        conn.setAutoCommit(false);
206

  
207
        //clean download double clicks
208
        String sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<30 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);";
209
        stmt.executeUpdate(sql);
210
        stmt.close();
211
        conn.commit();
212

  
213
        stmt = conn.createStatement();
214

  
215
        //clean view double clicks
216
        sql = "DELETE FROM piwiklog p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp from piwiklog p1, piwiklog p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<10 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);";
217
        stmt.executeUpdate(sql);
218
        stmt.close();
219
        conn.commit();
220
        conn.close();
221
    }
222

  
223
    private void viewsStats() throws Exception {
224
        if (conn.isClosed())
225
            connectDB();
226

  
227
        stmt = conn.createStatement();
228
        conn.setAutoCommit(false);
229

  
230
        //String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
231
        String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
232
        stmt.executeUpdate(sql);
233

  
234
        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
235
        stmt.executeUpdate(sql);
236

  
237
        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
238
        stmt.executeUpdate(sql);
239

  
240
        sql = "DROP VIEW IF EXISTS result_views_monthly;";
241
        stmt.executeUpdate(sql);
242

  
243
        stmt.close();
244
        conn.commit();
245
        conn.close();
246
    }
247

  
248
    private void downloadsStats() throws Exception {
249
        if (conn.isClosed())
250
            connectDB();
251

  
252
        stmt = conn.createStatement();
253
        conn.setAutoCommit(false);
254

  
255
        //String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
256
        String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
257
        stmt.executeUpdate(sql);
258

  
259
        //sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
260
        sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
261
        stmt.executeUpdate(sql);
262

  
263
        sql = "DROP VIEW IF EXISTS result_downloads_monthly;";
264
        stmt.executeUpdate(sql);
265

  
266
        stmt.close();
267
        conn.commit();
268
        conn.close();
269
    }
270

  
271
    public void finalizeStats() throws Exception {
272
        if (conn.isClosed())
273
            connectDB();
274

  
275
        stmt = conn.createStatement();
276
        conn.setAutoCommit(false);
277

  
278
        Calendar startCalendar = Calendar.getInstance();
279
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
280
        Calendar endCalendar = Calendar.getInstance();
281
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
282
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
283

  
284
        String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;";
285
        stmt.executeUpdate(sql);
286

  
287
        sql = "CREATE INDEX full_dates_full_date ON full_dates USING btree(full_date);";
288
        stmt.executeUpdate(sql);
289

  
290

  
291
        sql = "CREATE INDEX views_stats_source ON views_stats USING btree(source);";
292
        stmt.executeUpdate(sql);
293

  
294
        sql = "CREATE INDEX views_stats_repository_id ON views_stats USING btree(repository_id);";
295
        stmt.executeUpdate(sql);
296

  
297
        sql = "CREATE INDEX views_stats_result_id ON views_stats USING btree(result_id);";
298
        stmt.executeUpdate(sql);
299

  
300
        sql = "CREATE INDEX views_stats_date ON views_stats USING btree(date);";
301
        stmt.executeUpdate(sql);
302

  
303

  
304
        sql = "CREATE INDEX pageviews_stats_source ON pageviews_stats USING btree(source);";
305
        stmt.executeUpdate(sql);
306

  
307
        sql = "CREATE INDEX pageviews_stats_repository_id ON pageviews_stats USING btree(repository_id);";
308
        stmt.executeUpdate(sql);
309

  
310
        sql = "CREATE INDEX pageviews_stats_result_id ON pageviews_stats USING btree(result_id);";
311
        stmt.executeUpdate(sql);
312

  
313
        sql = "CREATE INDEX pageviews_stats_date ON pageviews_stats USING btree(date);";
314
        stmt.executeUpdate(sql);
315

  
316

  
317
        sql = "CREATE INDEX downloads_stats_source ON downloads_stats USING btree(source);";
318
        stmt.executeUpdate(sql);
319

  
320
        sql = "CREATE INDEX downloads_stats_repository_id ON downloads_stats USING btree(repository_id);";
321
        stmt.executeUpdate(sql);
322

  
323
        sql = "CREATE INDEX downloads_stats_result_id ON downloads_stats USING btree(result_id);";
324
        stmt.executeUpdate(sql);
325

  
326
        sql = "CREATE INDEX downloads_stats_date ON downloads_stats USING btree(date);";
327
        stmt.executeUpdate(sql);
328

  
329

  
330
        stmt.close();
331
        conn.commit();
332
        conn.close();
333
    }
334

  
335
    //views stats
336
    private void viewsStatsOLD() throws Exception {
337
        if (conn.isClosed())
338
            connectDB();
339

  
340
        stmt = conn.createStatement();
341
        conn.setAutoCommit(false);
342

  
343
        Calendar startCalendar = Calendar.getInstance();
344
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
345
        Calendar endCalendar = Calendar.getInstance();
346
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
347
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
348

  
349
        String sql = "CREATE OR REPLACE VIEW result_views_monthly as select entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
350
        stmt.executeUpdate(sql);
351

  
352
        sql = "SELECT d.id, d.new_date AS month, CASE when rdm.views IS NULL THEN 0 ELSE rdm.views END, d.source INTO result_views_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_views_monthly rdsm) d LEFT JOIN (SELECT id, month, views, source FROM result_views_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;";
353
        stmt.executeUpdate(sql);
354

  
355
        sql = "CREATE INDEX result_views_sushi_id ON result_views_sushi USING btree (id);";
356
        stmt.executeUpdate(sql);
357

  
358
        sql = "CREATE INDEX result_views_sushi_month ON result_views_sushi USING btree (month);";
359
        stmt.executeUpdate(sql);
360

  
361
        sql = "CREATE INDEX result_views_sushi_source ON result_views_sushi USING btree (source);";
362
        stmt.executeUpdate(sql);
363

  
364
        sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, max(views) as views, source INTO result_views FROM result_views_sushi rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY source, roid.id, month ORDER BY source, roid.id, month;";
365
        stmt.executeUpdate(sql);
366

  
367
        sql = "CREATE INDEX result_views_id ON result_views USING btree (id);";
368
        stmt.executeUpdate(sql);
369

  
370
        sql = "CREATE INDEX result_views_month ON result_views USING btree (month);";
371
        stmt.executeUpdate(sql);
372

  
373
        sql = "CREATE INDEX result_views_source ON result_views USING btree (source);";
374
        stmt.executeUpdate(sql);
375

  
376

  
377
        sql = "DROP VIEW IF EXISTS result_views_monthly;";
378
        stmt.executeUpdate(sql);
379

  
380
        stmt.close();
381
        conn.commit();
382
        conn.close();
383
    }
384

  
385
    //downloads stats
386
    private void downloadsStatsOLD() throws Exception {
387
        if (conn.isClosed())
388
            connectDB();
389

  
390
        stmt = conn.createStatement();
391
        conn.setAutoCommit(false);
392

  
393
        Calendar startCalendar = Calendar.getInstance();
394
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
395
        Calendar endCalendar = Calendar.getInstance();
396
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
397
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
398

  
399
        String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as downloads, extract('year' from timestamp::date) ||'-'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
400
        stmt.executeUpdate(sql);
401

  
402
        sql = "SELECT d.id, d.new_date AS month, CASE when rdm.downloads IS NULL THEN 0 ELSE rdm.downloads END, d.source INTO result_downloads_sushi FROM (SELECT distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date, rdsm.source FROM generate_series(0, " + diffMonth + ", 1) AS offs, result_downloads_monthly rdsm) d LEFT JOIN (SELECT id, month, downloads, source FROM result_downloads_monthly) rdm ON d.new_date=rdm.month AND d.id=rdm.id AND d.source=rdm.source ORDER BY d.source, d.id, d.new_date;";
403
        stmt.executeUpdate(sql);
404

  
405
        sql = "CREATE INDEX result_downloads_sushi_id ON result_downloads_sushi USING btree (id);";
406
        stmt.executeUpdate(sql);
407

  
408
        sql = "CREATE INDEX result_downloads_sushi_month ON result_downloads_sushi USING btree (month);";
409
        stmt.executeUpdate(sql);
410

  
411
        sql = "CREATE INDEX result_downloads_sushi_source ON result_downloads_sushi USING btree (source);";
412
        stmt.executeUpdate(sql);
413

  
414
        sql = "SELECT roid.id, extract('year' from month::date) ||'/'|| LPAD(CAST(extract('month' from month::date) AS VARCHAR), 2, '0') as month, downloads, source INTO result_downloads FROM result_downloads_sushi rvs, result_oids roid WHERE rvs.id=roid.orid ORDER BY source, roid.id, month;";
415
        stmt.executeUpdate(sql);
416

  
417
        sql = "CREATE INDEX result_downloads_id ON result_downloads USING btree (id);";
418
        stmt.executeUpdate(sql);
419

  
420
        sql = "CREATE INDEX result_downloads_month ON result_downloads USING btree (month);";
421
        stmt.executeUpdate(sql);
422

  
423
        sql = "CREATE INDEX result_downloads_source ON result_downloads USING btree (source);";
424
        stmt.executeUpdate(sql);
425

  
426

  
427
        sql = "DROP VIEW IF EXISTS result_downloads_monthly;";
428
        stmt.executeUpdate(sql);
429

  
430
        stmt.close();
431
        conn.commit();
432
        conn.close();
433
    }
434

  
435
    //Create repository Views statistics
436
    private void repositoryViewsStats() throws Exception {
437
        if (conn.isClosed())
438
            connectDB();
439

  
440
        stmt = conn.createStatement();
441
        conn.setAutoCommit(false);
442

  
443
        String sql = "SELECT entity_id AS id , COUNT(entity_id) AS number_of_views, timestamp::date AS date, source INTO repo_view_stats FROM piwiklog WHERE source!='5' AND action=\'action\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
444
        stmt.executeUpdate(sql);
445

  
446
        sql = "CREATE INDEX repo_view_stats_id ON repo_view_stats USING btree (id)";
447
        stmt.executeUpdate(sql);
448

  
449
        sql = "CREATE INDEX repo_view_stats_date ON repo_view_stats USING btree(date)";
450
        stmt.executeUpdate(sql);
451

  
452
        sql = "SELECT roid.id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_view_stats_monthly_clean FROM repo_view_stats rvs, result_oids roid where rvs.id=roid.orid group by roid.id, month, source;";
453
        stmt.executeUpdate(sql);
454

  
455
        sql = "CREATE INDEX repo_view_stats_monthly_clean_id ON repo_view_stats_monthly_clean USING btree (id)";
456
        stmt.executeUpdate(sql);
457

  
458
        sql = "CREATE INDEX repo_view_stats_monthly_clean_month ON repo_view_stats_monthly_clean USING btree(month)";
459
        stmt.executeUpdate(sql);
460

  
461
        sql = "CREATE INDEX repo_view_stats_monthly_clean_source ON repo_view_stats_monthly_clean USING btree(source)";
462
        stmt.executeUpdate(sql);
463

  
464
        Calendar startCalendar = Calendar.getInstance();
465
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
466
        Calendar endCalendar = Calendar.getInstance();
467
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
468
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
469

  
470
        //sql="CREATE OR REPLACE view repo_view_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
471
        sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_view_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_view_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_view_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
472
        stmt.executeUpdate(sql);
473

  
474
        sql = "CREATE INDEX repo_view_stats_monthly_id ON repo_view_stats_monthly USING btree (id)";
475
        stmt.executeUpdate(sql);
476

  
477
        sql = "CREATE INDEX repo_view_stats_monthly_month ON repo_view_stats_monthly USING btree(month)";
478
        stmt.executeUpdate(sql);
479

  
480
        sql = "CREATE INDEX repo_view_stats_monthly_source ON repo_view_stats_monthly USING btree(source)";
481
        stmt.executeUpdate(sql);
482

  
483
        sql = "CREATE OR REPLACE view repo_view_stats_monthly_sushi AS SELECT id, sum(number_of_views), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_view_stats group by id, month, source;";
484
        stmt.executeUpdate(sql);
485

  
486
        stmt.close();
487
        conn.commit();
488
        conn.close();
489
    }
490

  
491
    //Create repository downloads statistics
492
    private void repositoryDownloadsStats() throws Exception {
493
        if (conn.isClosed())
494
            connectDB();
495

  
496
        stmt = conn.createStatement();
497
        conn.setAutoCommit(false);
498

  
499
        String sql = "SELECT entity_id AS id, COUNT(entity_id) AS number_of_downloads, timestamp::date AS date, source INTO repo_download_stats FROM piwiklog WHERE source!='5' AND action=\'download\' AND source_item_type=\'repItem\' GROUP BY entity_id, date, source ORDER BY entity_id, date ASC, COUNT(entity_id) DESC;";
500
        stmt.executeUpdate(sql);
501

  
502
        sql = "CREATE INDEX repo_download_stats_id ON repo_download_stats USING btree (id)";
503
        stmt.executeUpdate(sql);
504

  
505
        sql = "CREATE INDEX repo_download_stats_date ON repo_download_stats USING btree(date)";
506
        stmt.executeUpdate(sql);
507

  
508
        sql = "SELECT roid.id, sum(number_of_downloads), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month, source INTO repo_download_stats_monthly_clean FROM repo_download_stats rvs, result_oids roid WHERE rvs.id=roid.orid GROUP BY roid.id, month, source;";
509
        stmt.executeUpdate(sql);
510

  
511
        sql = "CREATE INDEX repo_download_stats_monthly_clean_id ON repo_download_stats_monthly_clean USING btree (id)";
512
        stmt.executeUpdate(sql);
513

  
514
        sql = "CREATE INDEX repo_download_stats_monthly_clean_month ON repo_download_stats_monthly_clean USING btree(month)";
515
        stmt.executeUpdate(sql);
516

  
517
        sql = "CREATE INDEX repo_download_stats_monthly_clean_source ON repo_download_stats_monthly_clean USING btree(source)";
518
        stmt.executeUpdate(sql);
519

  
520
        Calendar startCalendar = Calendar.getInstance();
521
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
522
        Calendar endCalendar = Calendar.getInstance();
523
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
524
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
525

  
526
        //sql="CREATE OR REPLACE view repo_download_stats_monthly AS select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth +", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
527
        sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end, d.source INTO repo_download_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date, rdsm.source from generate_series(0, " + diffMonth + ", 1) AS offs, repo_download_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum, source from repo_download_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id and d.source=rdm.source order by d.id, d.new_date";
528
        stmt.executeUpdate(sql);
529

  
530
        sql = "CREATE INDEX repo_download_stats_monthly_id ON repo_download_stats_monthly USING btree (id)";
531
        stmt.executeUpdate(sql);
532

  
533
        sql = "CREATE INDEX repo_download_stats_monthly_month ON repo_download_stats_monthly USING btree(month)";
534
        stmt.executeUpdate(sql);
535

  
536
        sql = "CREATE INDEX repo_download_stats_monthly_source ON repo_download_stats_monthly USING btree(source)";
537
        stmt.executeUpdate(sql);
538

  
539

  
540
        sql = "CREATE OR REPLACE view repo_download_stats_monthly_sushi AS SELECT id, sum(number_of_downloads), extract('year' from date) ||'-'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') ||'-01' AS month, source FROM repo_download_stats group by id, month, source;";
541
        stmt.executeUpdate(sql);
542

  
543
        stmt.close();
544
        conn.commit();
545
        conn.close();
546
    }
547

  
548
    // Import OPENAIRE Logs to DB
549
    private void processPortalLog() throws Exception {
550

  
551
        if (conn.isClosed())
552
            connectDB();
553

  
554
        stmt = conn.createStatement();
555
        conn.setAutoCommit(false);
556

  
557
        ArrayList<String> jsonFiles = listHdfsDir(logPath + "portallog");
558
        PreparedStatement prepStatem = conn.prepareStatement("INSERT INTO process_portal_log (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
559
        int batch_size = 0;
560
        JSONParser parser = new JSONParser();
561
        for (String jsonFile : jsonFiles) {
562
            JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
563

  
564
            for (Object aJsonArray : jsonArray) {
565
                JSONObject jsonObjectRow = (JSONObject) aJsonArray;
566
                int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
567
                String idVisit = jsonObjectRow.get("idVisit").toString();
568
                String country = jsonObjectRow.get("country").toString();
569
                String referrerName = jsonObjectRow.get("referrerName").toString();
570
                String agent = jsonObjectRow.get("browser").toString();
571

  
572
                JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
573
                for (Object actionDetail : actionDetails) {
574
                    JSONObject actionDetailsObj = (JSONObject) actionDetail;
575

  
576
                    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
577
                    simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
578
                    Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
579

  
580
                    String action = actionDetailsObj.get("type").toString();
581
                    String url = actionDetailsObj.get("url").toString();
582

  
583
                    String entityID = processPortalURL(url);
584
                    String sourceItemType = "";
585

  
586
                    if (entityID.indexOf("|") > 0) {
587
                        sourceItemType = entityID.substring(0, entityID.indexOf("|"));
588
                        entityID = entityID.substring(entityID.indexOf("|") + 1);
589
                    }
590

  
591
                    prepStatem.setInt(1, idSite);
592
                    prepStatem.setString(2, idVisit);
593
                    prepStatem.setString(3, country);
594
                    prepStatem.setString(4, action);
595
                    prepStatem.setString(5, url);
596
                    prepStatem.setString(6, entityID);
597
                    prepStatem.setString(7, sourceItemType);
598
                    prepStatem.setString(8, simpleDateFormat.format(timestamp));
599
                    prepStatem.setString(9, referrerName);
600
                    prepStatem.setString(10, agent);
601

  
602
                    prepStatem.addBatch();
603
                    batch_size++;
604
                    if (batch_size == 10000) {
605
                        prepStatem.executeBatch();
606
                        conn.commit();
607
                        batch_size = 0;
608
                    }
609
                }
610
            }
611
        }
612
        prepStatem.executeBatch();
613
        conn.commit();
614

  
615
        stmt.close();
616
        conn.close();
617
    }
618

  
619
    private void portalStats() throws Exception {
620
        if (conn.isClosed())
621
            connectDB();
622

  
623
        stmt = conn.createStatement();
624
        conn.setAutoCommit(false);
625

  
626
        String sql = "INSERT INTO piwiklog SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'oaItem\', timestamp, referrer_name, agent FROM process_portal_log, public.result_oids roid WHERE entity_id IS NOT null AND entity_id=roid.id AND roid.orid IS NOT null;";
627
        stmt.executeUpdate(sql);
628
        stmt.close();
629
        conn.commit();
630

  
631
        stmt = conn.createStatement();
632
        sql = "INSERT INTO piwiklog SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'datasource\', timestamp, referrer_name, agent FROM process_portal_log, public.datasource_oids roid WHERE entity_id IS NOT null AND entity_id=roid.id AND roid.orid IS NOT null;";
633
        stmt.executeUpdate(sql);
634
        stmt.close();
635
        conn.commit();
636

  
637
        stmt = conn.createStatement();
638
        sql = "INSERT INTO piwiklog SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'organization\', timestamp, referrer_name, agent FROM process_portal_log, public.organization_oids roid WHERE entity_id IS NOT null AND entity_id=roid.id AND roid.orid IS NOT null;";
639
        stmt.executeUpdate(sql);
640
        stmt.close();
641
        conn.commit();
642

  
643
        stmt = conn.createStatement();
644
        sql = "INSERT INTO piwiklog SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'project\', timestamp, referrer_name, agent FROM process_portal_log, public.project_oids roid WHERE entity_id IS NOT null AND entity_id=roid.id AND roid.orid IS NOT null;";
645
        stmt.executeUpdate(sql);
646
        stmt.close();
647
        conn.commit();
648

  
649
        stmt = conn.createStatement();
650
        sql = "DROP TABLE process_portal_log;";
651
        stmt.executeUpdate(sql);
652
        stmt.close();
653
        conn.commit();
654

  
655
        conn.close();
656
    }
657

  
658
    private void cleanOAI() throws Exception {
659
        if (conn.isClosed())
660
            connectDB();
661

  
662
        conn.setAutoCommit(false);
663

  
664
        stmt = conn.createStatement();
665
        String sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/','oai:repositorio.chlc.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.chlc.min-saude.pt/%';";
666
        stmt.executeUpdate(sql);
667
        stmt.close();
668
        conn.commit();
669

  
670
        stmt = conn.createStatement();
671
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/','oai:repositorio.hospitaldebraga.pt:') WHERE entity_id LIKE 'oai:repositorio.hospitaldebraga.pt/%';";
672
        stmt.executeUpdate(sql);
673
        stmt.close();
674
        conn.commit();
675

  
676
        stmt = conn.createStatement();
677
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/','oai:repositorio.ipl.pt:') WHERE entity_id LIKE 'oai:repositorio.ipl.pt/%';";
678
        stmt.executeUpdate(sql);
679
        stmt.close();
680
        conn.commit();
681

  
682
        stmt = conn.createStatement();
683
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/','oai:bibliotecadigital.ipb.pt:') WHERE entity_id LIKE 'oai:bibliotecadigital.ipb.pt/%';";
684
        stmt.executeUpdate(sql);
685
        stmt.close();
686
        conn.commit();
687

  
688
        stmt = conn.createStatement();
689
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/','oai:repositorio.ismai.pt:') WHERE entity_id LIKE 'oai:repositorio.ismai.pt/%';";
690
        stmt.executeUpdate(sql);
691
        stmt.close();
692
        conn.commit();
693

  
694
        stmt = conn.createStatement();
695
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/','oai:repositorioaberto.uab.pt:') WHERE entity_id LIKE 'oai:repositorioaberto.uab.pt/%';";
696
        stmt.executeUpdate(sql);
697
        stmt.close();
698
        conn.commit();
699

  
700
        stmt = conn.createStatement();
701
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/','oai:repositorio.uac.pt:') WHERE entity_id LIKE 'oai:repositorio.uac.pt/%';";
702
        stmt.executeUpdate(sql);
703
        stmt.close();
704
        conn.commit();
705

  
706
        stmt = conn.createStatement();
707
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/','oai:repositorio.insa.pt:') WHERE entity_id LIKE 'oai:repositorio.insa.pt/%';";
708
        stmt.executeUpdate(sql);
709
        stmt.close();
710
        conn.commit();
711

  
712
        stmt = conn.createStatement();
713
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/','oai:repositorio.ipcb.pt:') WHERE entity_id LIKE 'oai:repositorio.ipcb.pt/%';";
714
        stmt.executeUpdate(sql);
715
        stmt.close();
716
        conn.commit();
717

  
718
        stmt = conn.createStatement();
719
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/','oai:repositorio.ispa.pt:') WHERE entity_id LIKE 'oai:repositorio.ispa.pt/%';";
720
        stmt.executeUpdate(sql);
721
        stmt.close();
722
        conn.commit();
723

  
724
        stmt = conn.createStatement();
725
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/','oai:repositorio.chporto.pt:') WHERE entity_id LIKE 'oai:repositorio.chporto.pt/%';";
726
        stmt.executeUpdate(sql);
727
        stmt.close();
728
        conn.commit();
729

  
730
        stmt = conn.createStatement();
731
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/','oai:repositorio.ucp.pt:') WHERE entity_id LIKE 'oai:repositorio.ucp.pt/%';";
732
        stmt.executeUpdate(sql);
733
        stmt.close();
734
        conn.commit();
735

  
736
        stmt = conn.createStatement();
737
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/','oai:rihuc.huc.min-saude.pt:') WHERE entity_id LIKE 'oai:rihuc.huc.min-saude.pt/%';";
738
        stmt.executeUpdate(sql);
739
        stmt.close();
740
        conn.commit();
741

  
742
        stmt = conn.createStatement();
743
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/','oai:repositorio.ipv.pt:') WHERE entity_id LIKE 'oai:repositorio.ipv.pt/%';";
744
        stmt.executeUpdate(sql);
745
        stmt.close();
746
        conn.commit();
747

  
748
        stmt = conn.createStatement();
749
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/','oai:www.repository.utl.pt:') WHERE entity_id LIKE 'oai:www.repository.utl.pt/%';";
750
        stmt.executeUpdate(sql);
751
        stmt.close();
752
        conn.commit();
753

  
754
        stmt = conn.createStatement();
755
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/','oai:run.unl.pt:') WHERE entity_id LIKE 'oai:run.unl.pt/%';";
756
        stmt.executeUpdate(sql);
757
        stmt.close();
758
        conn.commit();
759

  
760
        stmt = conn.createStatement();
761
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/','oai:sapientia.ualg.pt:') WHERE entity_id LIKE 'oai:sapientia.ualg.pt/%';";
762
        stmt.executeUpdate(sql);
763
        stmt.close();
764
        conn.commit();
765

  
766
        stmt = conn.createStatement();
767
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/','oai:repositorio.ipsantarem.pt:') WHERE entity_id LIKE 'oai:repositorio.ipsantarem.pt/%';";
768
        stmt.executeUpdate(sql);
769
        stmt.close();
770
        conn.commit();
771

  
772
        stmt = conn.createStatement();
773
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/','oai:arca.igc.gulbenkian.pt:') WHERE entity_id LIKE 'oai:arca.igc.gulbenkian.pt/%';";
774
        stmt.executeUpdate(sql);
775
        stmt.close();
776
        conn.commit();
777

  
778
        stmt = conn.createStatement();
779
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/','oai:ubibliorum.ubi.pt:') WHERE entity_id LIKE 'oai:ubibliorum.ubi.pt/%';";
780
        stmt.executeUpdate(sql);
781
        stmt.close();
782
        conn.commit();
783

  
784
        stmt = conn.createStatement();
785
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/','oai:digituma.uma.pt:') WHERE entity_id LIKE 'oai:digituma.uma.pt/%';";
786
        stmt.executeUpdate(sql);
787
        stmt.close();
788
        conn.commit();
789

  
790
        stmt = conn.createStatement();
791
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/','oai:repositorio.ul.pt:') WHERE entity_id LIKE 'oai:repositorio.ul.pt/%';";
792
        stmt.executeUpdate(sql);
793
        stmt.close();
794
        conn.commit();
795

  
796
        stmt = conn.createStatement();
797
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/','oai:repositorio.hff.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.hff.min-saude.pt/%';";
798
        stmt.executeUpdate(sql);
799
        stmt.close();
800
        conn.commit();
801

  
802
        stmt = conn.createStatement();
803
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/','oai:repositorium.sdum.uminho.pt:') WHERE entity_id LIKE 'oai:repositorium.sdum.uminho.pt/%';";
804
        stmt.executeUpdate(sql);
805
        stmt.close();
806
        conn.commit();
807

  
808
        stmt = conn.createStatement();
809
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/','oai:recipp.ipp.pt:') WHERE entity_id LIKE 'oai:recipp.ipp.pt/%';";
810
        stmt.executeUpdate(sql);
811
        stmt.close();
812
        conn.commit();
813

  
814
        stmt = conn.createStatement();
815
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/','oai:bdigital.ufp.pt:') WHERE entity_id LIKE 'oai:bdigital.ufp.pt/%';";
816
        stmt.executeUpdate(sql);
817
        stmt.close();
818
        conn.commit();
819

  
820
        stmt = conn.createStatement();
821
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/','oai:repositorio.lneg.pt:') WHERE entity_id LIKE 'oai:repositorio.lneg.pt/%';";
822
        stmt.executeUpdate(sql);
823
        stmt.close();
824
        conn.commit();
825

  
826
        stmt = conn.createStatement();
827
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/','oai:iconline.ipleiria.pt:') WHERE entity_id LIKE 'oai:iconline.ipleiria.pt/%';";
828
        stmt.executeUpdate(sql);
829
        stmt.close();
830
        conn.commit();
831

  
832
        stmt = conn.createStatement();
833
        sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/','oai:comum.rcaap.pt:') WHERE entity_id LIKE 'oai:comum.rcaap.pt/%';";
834
        stmt.executeUpdate(sql);
835
        stmt.close();
836
        conn.commit();
837

  
838
        conn.close();
839
    }
840

  
841
    //Create OpenAIRE's portal datasource statistics
842
    private void dataSourceStats() throws Exception {
843
        if (conn.isClosed())
844
            connectDB();
845

  
846
        stmt = conn.createStatement();
847
        conn.setAutoCommit(false);
848

  
849
        String sql = "SELECT  orgid AS id, max(viewcount) AS number_of_views, date INTO datasource_stats FROM (select entity_id, ooid.id AS orgid, count(entity_id) viewcount, timestamp::date as date FROM piwiklog, datasource_oids ooid WHERE entity_id=ooid.orid AND source_item_type='datasource' GROUP BY entity_id, ooid.id, date) AS temp GROUP BY orgid, date ORDER BY orgid, date ASC, max(viewcount) DESC;";
850
        stmt.executeUpdate(sql);
851

  
852
        sql = "CREATE INDEX datasource_stats_id ON datasource_stats USING btree (id)";
853
        stmt.executeUpdate(sql);
854

  
855
        sql = "CREATE INDEX datasource_stats_date ON datasource_stats USING btree(date)";
856
        stmt.executeUpdate(sql);
857

  
858
        sql = "SELECT id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month INTO datasource_stats_monthly_clean FROM datasource_stats GROUP BY id, month;";
859
        stmt.executeUpdate(sql);
860

  
861
        sql = "CREATE INDEX datasource_stats_monthly_clean_id ON datasource_stats_monthly_clean USING btree (id)";
862
        stmt.executeUpdate(sql);
863

  
864
        sql = "CREATE INDEX datasource_stats_monthly_clean_month ON datasource_stats_monthly_clean USING btree(month)";
865
        stmt.executeUpdate(sql);
866

  
867
        Calendar startCalendar = Calendar.getInstance();
868
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
869
        Calendar endCalendar = Calendar.getInstance();
870
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
871
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
872

  
873
        //sql="CREATE OR REPLACE view datasource_stats_monthly AS select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date from generate_series(0, " + diffMonth +", 1) AS offs, datasource_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from datasource_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
874
        sql = "select d.id, d.new_date AS month, case when rdm.sum is null then 0 else rdm.sum end INTO datasource_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date from generate_series(0, " + diffMonth + ", 1) AS offs, datasource_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from datasource_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
875
        stmt.executeUpdate(sql);
876

  
877
        sql = "CREATE INDEX datasource_stats_monthly_id ON datasource_stats_monthly USING btree (id)";
878
        stmt.executeUpdate(sql);
879

  
880
        sql = "CREATE INDEX datasource_stats_monthly_month ON datasource_stats_monthly USING btree(month)";
881
        stmt.executeUpdate(sql);
882

  
883
        stmt.close();
884
        conn.commit();
885
        conn.close();
886
    }
887

  
888
    //Create OpenAIRE's portal results statistics
889
    private void resultStats() throws Exception {
890
        if (conn.isClosed())
891
            connectDB();
892

  
893
        stmt = conn.createStatement();
894
        conn.setAutoCommit(false);
895

  
896
        String sql = "SELECT orgid AS id, max(viewcount) AS number_of_views, date INTO result_stats FROM (SELECT entity_id, ooid.id AS orgid, count(entity_id) viewcount, timestamp::date as date FROM piwiklog, result_oids ooid WHERE entity_id=ooid.orid AND source_item_type='oaItem' GROUP BY entity_id, ooid.id, date) AS temp GROUP BY orgid, date ORDER BY orgid, date ASC, max(viewcount) DESC;";
897
        stmt.executeUpdate(sql);
898

  
899
        sql = "CREATE INDEX result_stats_id ON result_stats USING btree (id)";
900
        stmt.executeUpdate(sql);
901

  
902
        sql = "CREATE INDEX result_stats_date ON result_stats USING btree(date)";
903
        stmt.executeUpdate(sql);
904

  
905
        sql = "SELECT id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month INTO result_stats_monthly_clean FROM result_stats group by id, month;";
906
        stmt.executeUpdate(sql);
907

  
908
        sql = "CREATE INDEX result_stats_monthly_clean_id ON result_stats_monthly_clean USING btree (id)";
909
        stmt.executeUpdate(sql);
910

  
911
        sql = "CREATE INDEX result_stats_monthly_clean_month ON result_stats_monthly_clean USING btree(month)";
912
        stmt.executeUpdate(sql);
913

  
914
        Calendar startCalendar = Calendar.getInstance();
915
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
916
        Calendar endCalendar = Calendar.getInstance();
917
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
918
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
919

  
920
        //sql="CREATE OR REPLACE view result_stats_monthly AS select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date from generate_series(0, " + diffMonth +", 1) AS offs, result_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from result_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
921
        sql = "select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end INTO result_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date from generate_series(0, " + diffMonth + ", 1) AS offs, result_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from result_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
922
        stmt.executeUpdate(sql);
923

  
924
        sql = "CREATE INDEX result_stats_monthly_id ON result_stats_monthly USING btree (id)";
925
        stmt.executeUpdate(sql);
926

  
927
        sql = "CREATE INDEX result_stats_monthly_month ON result_stats_monthly USING btree(month)";
928
        stmt.executeUpdate(sql);
929

  
930
        stmt.close();
931
        conn.commit();
932
        conn.close();
933
    }
934

  
935
    //Create OpenAIRE's portal organization statistics
936
    private void organizationsStats() throws Exception {
937
        if (conn.isClosed())
938
            connectDB();
939

  
940
        stmt = conn.createStatement();
941
        conn.setAutoCommit(false);
942

  
943
        String sql = "SELECT orgid AS id, max(viewcount) AS number_of_views, date INTO organization_stats FROM (SELECT entity_id, ooid.id AS orgid, count(entity_id) viewcount, timestamp::date AS date FROM piwiklog, organization_oids ooid WHERE entity_id=ooid.orid AND source_item_type='organization' GROUP BY entity_id, ooid.id, date) AS temp GROUP BY orgid, date ORDER BY orgid, date ASC, max(viewcount) DESC;";
944
        stmt.executeUpdate(sql);
945

  
946
        sql = "CREATE INDEX organization_stats_id ON organization_stats USING btree (id)";
947
        stmt.executeUpdate(sql);
948

  
949
        sql = "CREATE INDEX organization_stats_date ON organization_stats USING btree(date)";
950
        stmt.executeUpdate(sql);
951

  
952
        sql = "SELECT id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month INTO organization_stats_monthly_clean FROM organization_stats group by id, month;";
953
        stmt.executeUpdate(sql);
954

  
955
        sql = "CREATE INDEX organization_stats_monthly_clean_id ON organization_stats_monthly_clean USING btree (id)";
956
        stmt.executeUpdate(sql);
957

  
958
        sql = "CREATE INDEX organization_stats_monthly_clean_month ON organization_stats_monthly_clean USING btree(month)";
959
        stmt.executeUpdate(sql);
960

  
961
        Calendar startCalendar = Calendar.getInstance();
962
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
963
        Calendar endCalendar = Calendar.getInstance();
964
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
965
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
966

  
967
        //sql="CREATE OR REPLACE view organization_stats_monthly AS select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date from generate_series(0, " + diffMonth +", 1) AS offs, organization_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from organization_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
968
        sql = "select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end INTO organization_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY-MM-DD') AS new_date from generate_series(0, " + diffMonth + ", 1) AS offs, organization_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from organization_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
969
        stmt.executeUpdate(sql);
970

  
971
        sql = "CREATE INDEX organization_stats_monthly_id ON organization_stats_monthly USING btree (id)";
972
        stmt.executeUpdate(sql);
973

  
974
        sql = "CREATE INDEX organization_stats_monthly_month ON organization_stats_monthly USING btree(month)";
975
        stmt.executeUpdate(sql);
976

  
977
        stmt.close();
978
        conn.commit();
979
        conn.close();
980
    }
981

  
982
    //Create OpenAIRE's portal projects statistics
983
    private void projectsStats() throws Exception {
984
        if (conn.isClosed())
985
            connectDB();
986

  
987
        stmt = conn.createStatement();
988
        conn.setAutoCommit(false);
989

  
990
        String sql = "SELECT orgid AS id, max(viewcount) AS number_of_views, date INTO project_stats FROM (SELECT entity_id, ooid.id AS orgid, count(entity_id) viewcount, timestamp::date AS date FROM piwiklog, project_oids ooid WHERE entity_id=ooid.orid AND source_item_type='project' GROUP BY entity_id, ooid.id, date) AS temp GROUP BY orgid, date ORDER BY orgid, date ASC, max(viewcount) DESC;";
991
        stmt.executeUpdate(sql);
992

  
993
        sql = "CREATE INDEX project_stats_id ON project_stats USING btree (id)";
994
        stmt.executeUpdate(sql);
995

  
996
        sql = "CREATE INDEX project_stats_date ON project_stats USING btree(date)";
997
        stmt.executeUpdate(sql);
998

  
999
        sql = "SELECT id, sum(number_of_views), extract('year' from date) ||'/'|| LPAD(CAST(extract('month' from date) AS VARCHAR), 2, '0') AS month INTO project_stats_monthly_clean FROM project_stats group by id, month;";
1000
        stmt.executeUpdate(sql);
1001

  
1002
        sql = "CREATE INDEX project_stats_monthly_clean_id ON project_stats_monthly_clean USING btree (id)";
1003
        stmt.executeUpdate(sql);
1004

  
1005
        sql = "CREATE INDEX project_stats_monthly_clean_month ON project_stats_monthly_clean USING btree(month)";
1006
        stmt.executeUpdate(sql);
1007

  
1008
        Calendar startCalendar = Calendar.getInstance();
1009
        startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
1010
        Calendar endCalendar = Calendar.getInstance();
1011
        int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
1012
        int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
1013

  
1014
        //sql="CREATE OR REPLACE view project_stats_monthly AS select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date from generate_series(0, " + diffMonth +", 1) AS offs, project_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from project_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
1015
        sql = "select d.id, d.new_date as month, case when rdm.sum is null then 0 else rdm.sum end INTO project_stats_monthly from (select distinct rdsm.id, to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS new_date from generate_series(0, " + diffMonth + ", 1) AS offs, project_stats_monthly_clean rdsm) d LEFT JOIN (select id, month, sum from project_stats_monthly_clean) rdm ON d.new_date=rdm.month and d.id=rdm.id order by d.id, d.new_date";
1016
        stmt.executeUpdate(sql);
1017

  
1018
        sql = "CREATE INDEX project_stats_monthly_id ON project_stats_monthly USING btree (id)";
1019
        stmt.executeUpdate(sql);
1020

  
1021
        sql = "CREATE INDEX project_stats_monthly_month ON project_stats_monthly USING btree(month)";
1022
        stmt.executeUpdate(sql);
1023

  
1024
        stmt.close();
1025
        conn.commit();
1026
        conn.close();
1027
    }
1028

  
1029
    private String processPortalURL(String url) {
1030

  
1031
        if (url.indexOf("openaire.eu") > 0) {
1032
            try {
1033
                url = URLDecoder.decode(url, "UTF-8");
1034
            } catch (Exception e) {
1035
                log.info(url);
1036
            }
1037
            if (url.indexOf("datasourceId=") > 0 && url.substring(url.indexOf("datasourceId=") + 13).length() >= 46) {
1038
                url = "datasource|" + url.substring(url.indexOf("datasourceId=") + 13, url.indexOf("datasourceId=") + 59);
1039
            } else if (url.indexOf("datasource=") > 0 && url.substring(url.indexOf("datasource=") + 11).length() >= 46) {
1040
                url = "datasource|" + url.substring(url.indexOf("datasource=") + 11, url.indexOf("datasource=") + 57);
1041
            } else if (url.indexOf("datasourceFilter=") > 0 && url.substring(url.indexOf("datasourceFilter=") + 17).length() >= 46) {
1042
                url = "datasource|" + url.substring(url.indexOf("datasourceFilter=") + 17, url.indexOf("datasourceFilter=") + 63);
1043
            } else if (url.indexOf("articleId=") > 0 && url.substring(url.indexOf("articleId=") + 10).length() >= 46) {
1044
                url = "result|" + url.substring(url.indexOf("articleId=") + 10, url.indexOf("articleId=") + 56);
1045
            } else if (url.indexOf("datasetId=") > 0 && url.substring(url.indexOf("datasetId=") + 10).length() >= 46) {
1046
                url = "result|" + url.substring(url.indexOf("datasetId=") + 10, url.indexOf("datasetId=") + 56);
1047
            } else if (url.indexOf("projectId=") > 0 && url.substring(url.indexOf("projectId=") + 10).length() >= 46 && !url.contains("oai:dnet:corda")) {
1048
                url = "project|" + url.substring(url.indexOf("projectId=") + 10, url.indexOf("projectId=") + 56);
1049
            } else if (url.indexOf("organizationId=") > 0 && url.substring(url.indexOf("organizationId=") + 15).length() >= 46) {
1050
                url = "organization|" + url.substring(url.indexOf("organizationId=") + 15, url.indexOf("organizationId=") + 61);
1051
            } else {
1052
                url = "";
1053
            }
1054
        } else {
1055
            url = "";
1056
        }
1057

  
1058
        return url;
1059
    }
1060

  
1061
    private ArrayList<String> listHdfsDir(String dir) throws Exception {
1062

  
1063
        FileSystem hdfs = FileSystem.get(new Configuration());
1064
        RemoteIterator<LocatedFileStatus> Files;
1065
        ArrayList<String> fileNames = new ArrayList<>();
1066

  
1067
        try {
1068
            Path exportPath = new Path(hdfs.getUri() + dir);
1069
            Files = hdfs.listFiles(exportPath, false);
1070
            while (Files.hasNext()) {
1071
                String fileName = Files.next().getPath().toString();
1072
                //log.info("Found hdfs file " + fileName);
1073
                fileNames.add(fileName);
1074
            }
1075
            //hdfs.close();
1076
        } catch (Exception e) {
1077
            log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
1078
            throw new Exception("HDFS file path with exported data does not exist :   " + logPath, e);
1079
        }
1080

  
1081
        return fileNames;
1082
    }
1083

  
1084
    private String readHDFSFile(String filename) throws Exception {
1085
        String result;
1086
        try {
1087

  
1088
            FileSystem fs = FileSystem.get(new Configuration());
1089
            //log.info("reading file : " + filename);
1090

  
1091
            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(filename))));
1092

  
1093
            StringBuilder sb = new StringBuilder();
1094
            String line = br.readLine();
1095

  
1096
            while (line != null) {
1097
                if (!line.equals("[]")) {
1098
                    sb.append(line);
1099
                }
1100
                //sb.append(line);
1101
                line = br.readLine();
1102
            }
1103
            result = sb.toString().replace("][{\"idSite\"", ",{\"idSite\"");
1104
            if (result.equals("")) {
1105
                result = "[]";
1106
            }
1107

  
1108
            //fs.close();
1109
        } catch (Exception e) {
1110
            log.error(e);
1111
            throw new Exception(e);
1112
        }
1113

  
1114
        return result;
1115
    }
1116
}
modules/dnet-openaire-usage-stats-export/trunk/src/main/java/eu/dnetlib/usagestats/export/SushiStats.java
1
package eu.dnetlib.usagestats.export;
2

  
3
import java.io.*;
4
//import java.io.BufferedReader;
5
//import java.io.InputStreamReader;
6
import java.net.URL;
7
import java.net.URLConnection;
8
import java.sql.ResultSet;
9
import java.text.SimpleDateFormat;
10
import java.util.Date;
11
import java.util.Calendar;
12

  
13
import java.sql.Connection;
14
import java.sql.DriverManager;
15
import java.sql.PreparedStatement;
16
import java.sql.Statement;
17

  
18
import org.json.simple.JSONArray;
19
import org.json.simple.JSONObject;
20
import org.json.simple.parser.JSONParser;
21

  
22
import org.apache.log4j.Logger;
23

  
24
/**
25
 * Created by tsampikos on 28/3/2017.
26
 */
27
public class SushiStats {
28
     private final String dbUrl;
29
    private final String dbSchema;
30
    private final String dbUserName;
31
    private final String dbPassword;
32

  
33
    private Connection conn = null;
34
    private Statement stmt = null;
35

  
36
    private final Logger log = Logger.getLogger(this.getClass());
37

  
38
    public SushiStats(String dbUrl, String dbUsername, String dbPassword) throws Exception {
39
        this.dbUrl = dbUrl;
40
        this.dbSchema = "shadow";
41
        this.dbUserName = dbUsername;
42
        this.dbPassword = dbPassword;
43

  
44
        connectDB();
45
        createTables();
46
    }
47

  
48
    private void connectDB() throws Exception {
49
        try {
50
            Class.forName("org.postgresql.Driver");
51
            conn = DriverManager.getConnection(dbUrl, dbUserName, dbPassword);
52

  
53
            stmt = conn.createStatement();
54
            String sqlSetSearchPath = "SET search_path TO " + dbSchema + ";";
55
            stmt.executeUpdate(sqlSetSearchPath);
56

  
57
            log.info("Opened database successfully");
58

  
59
        } catch (Exception e) {
60
            log.error("Connect to db failed: " + e);
61
            throw new Exception("Failed to connect to db: " + e.toString(), e);
62
        }
63
    }
64

  
65
    private void createTables() throws Exception {
66
        try {
67

  
68
            stmt = conn.createStatement();
69
            String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, oai TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, oai, date, metric_type));";
70
            stmt.executeUpdate(sqlCreateTableSushiLog);
71

  
72
            String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
73
            stmt.executeUpdate(sqlCopyPublicSushiLog);
74

  
75
            String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +
76
                " ON INSERT TO sushilog " +
77
                " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," +
78
                "sushilog.oai, sushilog.date " +
79
                "FROM sushilog " +
80
                "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.oai = new.oai AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
81
            stmt.executeUpdate(sqlcreateRuleSushiLog);
82

  
83
            stmt.close();
84
            conn.close();
85
            log.info("Sushi Tables Created");
86
        } catch (Exception e) {
87
            log.error("Failed to create tables: " + e);
88
            throw new Exception("Failed to create tables: " + e.toString(), e);
89
        }
90
    }
91

  
92
    public void sushiStats() throws Exception {
93
        if (conn.isClosed())
94
            connectDB();
95

  
96
        stmt = conn.createStatement();
97
        conn.setAutoCommit(false);
98

  
99
        //String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
100
        //String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total'";
101
        //String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total';";
102
        String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND metric_type='ft_total';";
103
        stmt.executeUpdate(sql);
104

  
105

  
106
        stmt.close();
107
        conn.commit();
108
        conn.close();
109
    }
110

  
111
    public void processIrusRRReport() throws Exception {
112
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
113
        String reportUrl = "http://www.irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" + simpleDateFormat.format(new Date()) + "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
114
        String text = getJson(reportUrl, "", "");
115

  
116
        JSONParser parser = new JSONParser();
117
        JSONObject jsonObject = (JSONObject) parser.parse(text);
118
        jsonObject = (JSONObject) jsonObject.get("ReportResponse");
119
        jsonObject = (JSONObject) jsonObject.get("Report");
120
        jsonObject = (JSONObject) jsonObject.get("Report");
121
        jsonObject = (JSONObject) jsonObject.get("Customer");
122
        JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
123
        int i = 0;
124
        for (Object aJsonArray : jsonArray) {
125
            JSONObject jsonObjectRow = (JSONObject) aJsonArray;
126
            JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
127
            for (Object identifier : itemIdentifier) {
128
                JSONObject opendoar = (JSONObject) identifier;
129
                if(opendoar.get("Type").toString().equals("OpenDOAR")){
130
                    //System.out.println(i + ": " + opendoar.get("Value").toString());
131
                    log.info(i + ": " + opendoar.get("Value").toString());
132
                    i++;
133
                    processIrusIRReport(opendoar.get("Value").toString());
134
                    break;
135
                }
136
            }
137
            //break;
138
        }
139
    }
140

  
141
    private void processIrusIRReport(String opendoar) throws Exception {
142
        if (conn.isClosed())
143
            connectDB();
144

  
145
        conn.setAutoCommit(false);
146

  
147
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
148

  
149
        Calendar start = Calendar.getInstance();
150
        start.set(Calendar.YEAR, 2016);
151
        start.set(Calendar.MONTH, Calendar.JANUARY);
152
        //start.setTime(simpleDateFormat.parse("2016-01"));
153

  
154
        Calendar end = Calendar.getInstance();
155
        end.add(Calendar.DAY_OF_MONTH, -1);
156

  
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff