Revision 58415
Added by Antonis Lempesis about 4 years ago
SarcStats.java | ||
---|---|---|
1 |
package eu.dnetlib.usagestats.export; |
|
2 |
|
|
3 |
import java.io.*; |
|
4 |
//import java.io.BufferedReader; |
|
5 |
//import java.io.InputStreamReader; |
|
6 |
import java.net.URL; |
|
7 |
import java.net.URLConnection; |
|
8 |
import java.sql.ResultSet; |
|
9 |
import java.text.SimpleDateFormat; |
|
10 |
import java.util.Calendar; |
|
11 |
import java.sql.PreparedStatement; |
|
12 |
import java.sql.Statement; |
|
13 |
import org.json.simple.JSONArray; |
|
14 |
import org.json.simple.JSONObject; |
|
15 |
import org.json.simple.parser.JSONParser; |
|
16 |
|
|
17 |
import org.apache.log4j.Logger; |
|
18 |
|
|
19 |
/** |
|
20 |
* Created by dpie |
|
21 |
*/ |
|
22 |
public class SarcStats { |
|
23 |
|
|
24 |
private Statement stmt = null; |
|
25 |
|
|
26 |
private final Logger log = Logger.getLogger(this.getClass()); |
|
27 |
|
|
28 |
public SarcStats() throws Exception { |
|
29 |
createTables(); |
|
30 |
} |
|
31 |
|
|
32 |
private void createTables() throws Exception { |
|
33 |
try { |
|
34 |
|
|
35 |
stmt = ConnectDB.getConnection().createStatement(); |
|
36 |
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));"; |
|
37 |
stmt.executeUpdate(sqlCreateTableSushiLog); |
|
38 |
|
|
39 |
//String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;"; |
|
40 |
//stmt.executeUpdate(sqlCopyPublicSushiLog); |
|
41 |
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " |
|
42 |
+ " ON INSERT TO sushilog " |
|
43 |
+ " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository," |
|
44 |
+ "sushilog.rid, sushilog.date " |
|
45 |
+ "FROM sushilog " |
|
46 |
+ "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;"; |
|
47 |
stmt.executeUpdate(sqlcreateRuleSushiLog); |
|
48 |
|
|
49 |
stmt.close(); |
|
50 |
ConnectDB.getConnection().close(); |
|
51 |
log.info("Sushi Tables Created"); |
|
52 |
} catch (Exception e) { |
|
53 |
log.error("Failed to create tables: " + e); |
|
54 |
throw new Exception("Failed to create tables: " + e.toString(), e); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
public void processSarc() throws Exception { |
|
59 |
processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X"); |
|
60 |
processARReport("https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X"); |
|
61 |
processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335"); |
|
62 |
processARReport("https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030"); |
|
63 |
processARReport("https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781"); |
|
64 |
processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529"); |
|
65 |
processARReport("https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027"); |
|
66 |
processARReport("https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474"); |
|
67 |
processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099"); |
|
68 |
processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187"); |
|
69 |
processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X"); |
|
70 |
processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799"); |
|
71 |
processARReport("https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098"); |
|
72 |
processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754"); |
|
73 |
processARReport("https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794"); |
|
74 |
processARReport("https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826"); |
|
75 |
processARReport("https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015"); |
|
76 |
} |
|
77 |
|
|
78 |
public void sarcStats() throws Exception { |
|
79 |
stmt = ConnectDB.getConnection().createStatement(); |
|
80 |
ConnectDB.getConnection().setAutoCommit(false); |
|
81 |
|
|
82 |
//String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' INTO downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total'"; |
|
83 |
String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, public.datasource_oids d, public.datasource_results dr, public.result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';"; |
|
84 |
stmt.executeUpdate(sql); |
|
85 |
|
|
86 |
stmt.close(); |
|
87 |
ConnectDB.getConnection().commit(); |
|
88 |
ConnectDB.getConnection().close(); |
|
89 |
} |
|
90 |
|
|
91 |
public void processARReport(String url, String issn) throws Exception { |
|
92 |
log.info("Processing SARC! issn: " + issn + " with url: " + url); |
|
93 |
ConnectDB.getConnection().setAutoCommit(false); |
|
94 |
|
|
95 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); |
|
96 |
|
|
97 |
Calendar start = Calendar.getInstance(); |
|
98 |
start.set(Calendar.YEAR, 2016); |
|
99 |
start.set(Calendar.MONTH, Calendar.JANUARY); |
|
100 |
//start.setTime(simpleDateFormat.parse("2016-01")); |
|
101 |
|
|
102 |
Calendar end = Calendar.getInstance(); |
|
103 |
end.add(Calendar.DAY_OF_MONTH, -1); |
|
104 |
|
|
105 |
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|
106 |
PreparedStatement st = ConnectDB.getConnection().prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;"); |
|
107 |
st.setString(1, issn); |
|
108 |
ResultSet rs_date = st.executeQuery(); |
|
109 |
while (rs_date.next()) { |
|
110 |
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { |
|
111 |
start.setTime(sdf.parse(rs_date.getString(1))); |
|
112 |
} |
|
113 |
} |
|
114 |
rs_date.close(); |
|
115 |
|
|
116 |
PreparedStatement preparedStatement = ConnectDB.getConnection().prepareStatement("INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)"); |
|
117 |
int batch_size = 0; |
|
118 |
|
|
119 |
while (start.before(end)) { |
|
120 |
//String reportUrl = "http://irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) + "&RepositoryIdentifier=opendoar%3A" + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback="; |
|
121 |
String reportUrl = url + "GetReport/?Report=AR1&Format=json&BeginDate=" + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()); |
|
122 |
//System.out.println(reportUrl); |
|
123 |
start.add(Calendar.MONTH, 1); |
|
124 |
|
|
125 |
String text = getJson(reportUrl); |
|
126 |
if (text == null) { |
|
127 |
continue; |
|
128 |
} |
|
129 |
|
|
130 |
/* |
|
131 |
PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) + ".json")); |
|
132 |
wr.print(text); |
|
133 |
wr.close(); |
|
134 |
*/ |
|
135 |
JSONParser parser = new JSONParser(); |
|
136 |
JSONObject jsonObject = (JSONObject) parser.parse(text); |
|
137 |
jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse"); |
|
138 |
jsonObject = (JSONObject) jsonObject.get("sc:Report"); |
|
139 |
if (jsonObject == null) { |
|
140 |
continue; |
|
141 |
} |
|
142 |
jsonObject = (JSONObject) jsonObject.get("c:Report"); |
|
143 |
jsonObject = (JSONObject) jsonObject.get("c:Customer"); |
|
144 |
Object obj = jsonObject.get("c:ReportItems"); |
|
145 |
JSONArray jsonArray = new JSONArray(); |
|
146 |
if (obj instanceof JSONObject) { |
|
147 |
jsonArray.add(obj); |
|
148 |
} else { |
|
149 |
jsonArray = (JSONArray) obj; |
|
150 |
//jsonArray = (JSONArray) jsonObject.get("c:ReportItems"); |
|
151 |
} |
|
152 |
if (jsonArray == null) { |
|
153 |
continue; |
|
154 |
} |
|
155 |
|
|
156 |
String rid = ""; |
|
157 |
for (Object aJsonArray : jsonArray) { |
|
158 |
JSONObject jsonObjectRow = (JSONObject) aJsonArray; |
|
159 |
JSONArray itemIdentifier = new JSONArray(); |
|
160 |
obj = jsonObjectRow.get("c:ItemIdentifier"); |
|
161 |
if (obj instanceof JSONObject) { |
|
162 |
itemIdentifier.add(obj); |
|
163 |
} else { |
|
164 |
//JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier"); |
|
165 |
itemIdentifier = (JSONArray) obj; |
|
166 |
} |
|
167 |
for (Object identifier : itemIdentifier) { |
|
168 |
JSONObject doi = (JSONObject) identifier; |
|
169 |
if (doi.get("c:Type").toString().equals("DOI")) { |
|
170 |
rid = doi.get("c:Value").toString(); |
|
171 |
//System.out.println("DOI: " + rid); |
|
172 |
break; |
|
173 |
} |
|
174 |
} |
|
175 |
if (rid.isEmpty()) { |
|
176 |
continue; |
|
177 |
} |
|
178 |
|
|
179 |
JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance"); |
|
180 |
//for (Object perf : itemPerformance) { |
|
181 |
JSONObject performance = (JSONObject) itemPerformance; |
|
182 |
JSONObject periodObj = (JSONObject) performance.get("c:Period"); |
|
183 |
String period = periodObj.get("c:Begin").toString(); |
|
184 |
JSONObject instanceObj = (JSONObject) performance.get("c:Instance"); |
|
185 |
String type = instanceObj.get("c:MetricType").toString(); |
|
186 |
String count = instanceObj.get("c:Count").toString(); |
|
187 |
//System.out.println(rid + " : " + period + " : " + count); |
|
188 |
|
|
189 |
preparedStatement.setString(1, "SARC-OJS"); |
|
190 |
preparedStatement.setString(2, issn); |
|
191 |
//preparedStatement.setString(2, url); |
|
192 |
preparedStatement.setString(3, rid); |
|
193 |
preparedStatement.setString(4, period); |
|
194 |
preparedStatement.setString(5, type); |
|
195 |
preparedStatement.setInt(6, Integer.parseInt(count)); |
|
196 |
preparedStatement.addBatch(); |
|
197 |
batch_size++; |
|
198 |
if (batch_size == 10000) { |
|
199 |
preparedStatement.executeBatch(); |
|
200 |
ConnectDB.getConnection().commit(); |
|
201 |
batch_size = 0; |
|
202 |
} |
|
203 |
//} |
|
204 |
|
|
205 |
//break; |
|
206 |
} |
|
207 |
//break; |
|
208 |
} |
|
209 |
|
|
210 |
preparedStatement.executeBatch(); |
|
211 |
ConnectDB.getConnection().commit(); |
|
212 |
ConnectDB.getConnection().close(); |
|
213 |
} |
|
214 |
|
|
215 |
private String getJson(String url) { |
|
216 |
//String cred=username+":"+password; |
|
217 |
//String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes()); |
|
218 |
try { |
|
219 |
URL website = new URL(url); |
|
220 |
URLConnection connection = website.openConnection(); |
|
221 |
//connection.setRequestProperty ("Authorization", "Basic "+encoded); |
|
222 |
StringBuilder response; |
|
223 |
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { |
|
224 |
response = new StringBuilder(); |
|
225 |
String inputLine; |
|
226 |
while ((inputLine = in.readLine()) != null) { |
|
227 |
response.append(inputLine); |
|
228 |
response.append("\n"); |
|
229 |
} |
|
230 |
} |
|
231 |
return response.toString(); |
|
232 |
} catch (Exception e) { |
|
233 |
log.error("Failed to get URL: " + e); |
|
234 |
//System.out.println("Failed to get URL: " + e); |
|
235 |
return null; |
|
236 |
//throw new Exception("Failed to get URL: " + e.toString(), e); |
|
237 |
} |
|
238 |
} |
|
239 |
} |
Also available in: Unified diff
a ton of fixes. Close to running smoothly in all cases