Project

General

Profile

1
package eu.dnetlib.usagestats.export;
2

    
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.fs.FSDataOutputStream;
5
import org.apache.hadoop.fs.Path;
6
import org.apache.hadoop.fs.FileSystem;
7
import org.apache.log4j.Logger;
8

    
9
import java.io.*;
10
import java.net.URL;
11
import java.net.URLConnection;
12
import java.sql.Connection;
13
import java.sql.PreparedStatement;
14
import java.sql.ResultSet;
15
import java.sql.Statement;
16
import java.text.SimpleDateFormat;
17
import java.util.Date;
18
import java.util.Calendar;
19

    
20
public class PiwikDownloadLogs {
21

    
22
    private final String piwikUrl;
23
    private Date startDate;
24
    private final String tokenAuth;
25

    
26
    /*
27
       The Piwik's API method 
28
     */
29
    private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
30
    private final String format = "&format=json";
31

    
32
    private final Logger log = Logger.getLogger(this.getClass());
33

    
34
    public PiwikDownloadLogs(String piwikUrl, String tokenAuth) throws Exception {
35
        this.piwikUrl = piwikUrl;
36
        this.tokenAuth = tokenAuth;
37

    
38
    }
39

    
40
    private String getPiwikLogUrl() {
41
        return "https://" + piwikUrl + "/";
42
    }
43

    
44
    private String getJson(String url) throws Exception {
45
        try {
46
            URL website = new URL(url);
47
            URLConnection connection = website.openConnection();
48

    
49
            //connection.setRequestProperty ("Authorization", "Basic "+encoded);
50
            StringBuilder response;
51
            try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
52
                response = new StringBuilder();
53
                String inputLine;
54
                while ((inputLine = in.readLine()) != null) {
55
                    response.append(inputLine);
56
                    response.append("\n");
57
                }
58
            }
59
            return response.toString();
60
        } catch (Exception e) {
61
            log.error("Failed to get URL: " + e);
62
            throw new Exception("Failed to get URL: " + e.toString(), e);
63
        }
64
    }
65

    
66
    public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
67

    
68
        Statement statement = ConnectDB.DB_CONNECTION.createStatement();
69
        ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from datasource order by piwik_id;");
70
        PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
71
        while (rs.next()) {
72
            int siteId = rs.getInt(1);
73
            st.setInt(1, siteId);
74
            ResultSet rs_date = st.executeQuery();
75
            while (rs_date.next()) {
76
                SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
77
                Date dateMax = sdf.parse(rs_date.getString(1));
78
                //Date dateMax = sdf.parse("2020-02-01");
79
                Calendar start = Calendar.getInstance();
80
                start.setTime(dateMax);
81
                Calendar end = Calendar.getInstance();
82
                end.add(Calendar.DAY_OF_MONTH, -1);
83

    
84
                for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
85
                    log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
86

    
87
                    String period = "&period=day&date=" + sdf.format(date);
88
                    String outFolder = "";
89
                    //portal siteId = 109;
90
                    if (siteId == Integer.parseInt(portalMatomoID)) {
91
                        outFolder = portalLogPath;
92
                    } else {
93
                        outFolder = repoLogsPath;
94
                    }
95
                    FileSystem fs = FileSystem.get(new Configuration());
96
                    FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
97

    
98
                    String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
99
                    String content = "";
100

    
101
                    int i = 0;
102

    
103
                    while (!content.equals("[]\n")) {
104
                        String apiUrl = baseApiUrl;
105

    
106
                        if (i > 0) {
107
                            apiUrl += "&filter_offset=" + (i * 1000);
108
                        }
109

    
110
                        content = getJson(apiUrl);
111

    
112
                        fin.write(content.getBytes());
113

    
114
                        i++;
115
                    }
116
                    fin.close();
117

    
118
                }
119

    
120
            }
121
        }
122
    }
123

    
124
    //    private void flushString(String data, String destination) throws Exception {
125
//        FSDataOutputStream fin;
126
//        try {
127
//            FileSystem fs = FileSystem.get(new Configuration());
128
//            fin = fs.create(new Path(destination), true);
129
//            fin.write(data.getBytes());
130
//            fin.close();
131
//        } catch (Exception e) {
132
//            log.error("Failed  to write exported data to a file : ", e);
133
//            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
134
//        }
135
//    }
136
}
(2-2/5)