1
|
package eu.dnetlib.data.collector.plugins.opentrial;
|
2
|
|
3
|
/**
|
4
|
* Created by miriam on 07/03/2017.
|
5
|
*/
|
6
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
7
|
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
|
8
|
import org.apache.commons.io.IOUtils;
|
9
|
import java.net.*;
|
10
|
import java.util.Iterator;
|
11
|
import java.util.concurrent.ArrayBlockingQueue;
|
12
|
//import java.util.function.Consumer;
|
13
|
|
14
|
import org.apache.commons.logging.Log;
|
15
|
import org.apache.commons.logging.LogFactory;
|
16
|
import org.json.*;
|
17
|
|
18
|
|
19
|
|
20
|
public class OpenTrialIterator implements Iterable<String> {
|
21
|
|
22
|
private final String base_url;
|
23
|
private int total ;
|
24
|
private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100);
|
25
|
private int current = 0;
|
26
|
private static final Log log = LogFactory.getLog(OpenTrialIterator.class);
|
27
|
|
28
|
public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{
|
29
|
try {
|
30
|
String q = "per_page=100";
|
31
|
if (!(from_date == null)) {
|
32
|
if (!(to_date == null)) {
|
33
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q;
|
34
|
|
35
|
} else
|
36
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q;
|
37
|
}
|
38
|
this.base_url = base_url+ q;
|
39
|
log.info("url from which to collect " + this.base_url);
|
40
|
prepare();
|
41
|
}catch(Exception ex){
|
42
|
throw new CollectorServiceException(ex);
|
43
|
}
|
44
|
}
|
45
|
|
46
|
private void prepare()throws Exception {
|
47
|
JSONObject json = new JSONObject(getPage(1));
|
48
|
total = json.getInt("total_count");
|
49
|
log.info("Total number of entries to collect: " + total);
|
50
|
fillTrials(json);
|
51
|
}
|
52
|
|
53
|
|
54
|
@Override
|
55
|
public Iterator<String> iterator() {
|
56
|
return new Iterator<String>(){
|
57
|
|
58
|
private int page_number = 2;
|
59
|
|
60
|
|
61
|
@Override
|
62
|
public void remove(){
|
63
|
|
64
|
}
|
65
|
|
66
|
@Override
|
67
|
public String next() {
|
68
|
try {
|
69
|
if (trials.isEmpty()) {
|
70
|
JSONObject json = new JSONObject(getPage(page_number));
|
71
|
fillTrials(json);
|
72
|
page_number++;
|
73
|
}
|
74
|
return trials.poll();
|
75
|
}catch(Exception ex){
|
76
|
throw new CollectorServiceRuntimeException(ex);
|
77
|
}
|
78
|
}
|
79
|
|
80
|
@Override
|
81
|
public boolean hasNext(){
|
82
|
log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total));
|
83
|
return (current < total || !trials.isEmpty());
|
84
|
}
|
85
|
|
86
|
|
87
|
};
|
88
|
|
89
|
}
|
90
|
|
91
|
private void fillTrials(JSONObject json)throws CollectorServiceException{
|
92
|
|
93
|
JSONArray entries = json.getJSONArray("items");
|
94
|
for(Object entry: entries) {
|
95
|
try {
|
96
|
trials.put(XML.toString(entry));
|
97
|
}catch(Exception ex){
|
98
|
throw new CollectorServiceException(ex);
|
99
|
}
|
100
|
current++;
|
101
|
}
|
102
|
|
103
|
}
|
104
|
private String getPage(int page_number)throws CollectorServiceException {
|
105
|
|
106
|
try {
|
107
|
URL url = new URL(base_url + "&page=" + page_number);
|
108
|
URLConnection conn = url.openConnection();
|
109
|
conn.setRequestProperty("User-Agent", "Mozilla/5.0");
|
110
|
return (IOUtils.toString(conn.getInputStream()));
|
111
|
}catch(Exception ex){
|
112
|
throw new CollectorServiceException(ex);
|
113
|
}
|
114
|
}
|
115
|
|
116
|
|
117
|
}
|