Project

General

Profile

1
package eu.dnetlib.usagestats.export;
2

    
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

    
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.PreparedStatement;
13
import java.sql.ResultSet;
14
import java.sql.Statement;
15
import java.text.SimpleDateFormat;
16
import java.util.Date;
17
import java.util.Calendar;
18

    
19
public class PiwikDownloadLogs {
20

    
21
    private final String piwikUrl;
22
    private Date startDate;
23
    private final String tokenAuth;
24

    
25
    /*
26
       The Piwik's API method 
27
     */
28
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
29
    private final String format = "&format=json";
30

    
31
    private final Logger log = Logger.getLogger(this.getClass());
32

    
33
    public PiwikDownloadLogs(String piwikUrl, String tokenAuth) {
34
        this.piwikUrl = piwikUrl;
35
        this.tokenAuth = tokenAuth;
36

    
37
    }
38

    
39
    private String getPiwikLogUrl() {
40
        return "https://" + piwikUrl + "/";
41
    }
42

    
43
    private String getJson(String url) throws Exception {
44
        try {
45
            URL website = new URL(url);
46
            URLConnection connection = website.openConnection();
47

    
48
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
49
            StringBuilder response;
50
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
51
                response = new StringBuilder();
52
                String inputLine;
53
                while ((inputLine = in.readLine()) != null) {
54
                    response.append(inputLine);
55
                    response.append("\n");
56
                }
57
            }
58
            return response.toString();
59
        } catch (Exception e) {
60
            log.error("Failed to get URL: " + e);
61
            throw new Exception("Failed to get URL: " + e.toString(), e);
62
        }
63
    }
64

    
65
    public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
66

    
67
        Statement statement = ConnectDB.getConnection().createStatement();
68
                
69
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
70
        while (rs.next()) {
71
            int siteId = rs.getInt(1);
72
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
73

    
74
            Calendar start = Calendar.getInstance();
75
            start.set(Calendar.YEAR, 2016);
76
            start.set(Calendar.MONTH, Calendar.MARCH);
77
            //start.setTime(simpleDateFormat.parse("2016-01"));
78

    
79
            Calendar end = Calendar.getInstance();
80
            end.add(Calendar.DAY_OF_MONTH, -1);
81

    
82
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
83
            PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
84
            st.setInt(1, siteId);
85

    
86
            ResultSet rs_date = st.executeQuery();
87
            while (rs_date.next()) {
88
                if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
89
                    start.setTime(sdf.parse(rs_date.getString(1)));
90
                }
91
            }
92
            rs_date.close();
93

    
94
            for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
95
                log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
96

    
97
                String period = "&period=day&date=" + sdf.format(date);
98
                String outFolder = "";
99
                //portal siteId = 109;
100
                if (siteId == Integer.parseInt(portalMatomoID)) {
101
                    outFolder = portalLogPath;
102
                } else {
103
                    outFolder = repoLogsPath;
104
                }
105
                FileSystem fs = FileSystem.get(new Configuration());
106
                FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
107

    
108
                String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
109
                String content = "";
110

    
111
                int i = 0;
112

    
113
                while (!content.equals("[]\n")) {
114
                    String apiUrl = baseApiUrl;
115

    
116
                    if (i > 0) {
117
                        apiUrl += "&filter_offset=" + (i * 1000);
118
                    }
119

    
120
                    content = getJson(apiUrl);
121

    
122
                    fin.write(content.getBytes());
123

    
124
                    i++;
125
                }
126
                fin.close();
127

    
128
            }
129

    
130
        }
131
}
(4-4/9)