1 |
48028
|
claudio.at
|
package eu.dnetlib.data.collector.plugins.opentrial;
|
2 |
|
|
|
3 |
|
|
/**
|
4 |
|
|
* Created by miriam on 07/03/2017.
|
5 |
|
|
*/
|
6 |
|
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
7 |
|
|
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
|
8 |
|
|
import org.apache.commons.io.IOUtils;
|
9 |
|
|
import java.net.*;
|
10 |
|
|
import java.util.Iterator;
|
11 |
|
|
import java.util.concurrent.ArrayBlockingQueue;
|
12 |
|
|
//import java.util.function.Consumer;
|
13 |
|
|
|
14 |
|
|
import org.apache.commons.logging.Log;
|
15 |
|
|
import org.apache.commons.logging.LogFactory;
|
16 |
|
|
import org.json.*;
|
17 |
|
|
|
18 |
|
|
|
19 |
|
|
|
20 |
|
|
public class OpenTrialIterator implements Iterable<String> {
|
21 |
|
|
|
22 |
|
|
private final String base_url;
|
23 |
|
|
private int total ;
|
24 |
|
|
private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100);
|
25 |
|
|
private int current = 0;
|
26 |
|
|
private static final Log log = LogFactory.getLog(OpenTrialIterator.class);
|
27 |
|
|
|
28 |
|
|
public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{
|
29 |
|
|
try {
|
30 |
|
|
String q = "per_page=100";
|
31 |
|
|
if (!(from_date == null)) {
|
32 |
|
|
if (!(to_date == null)) {
|
33 |
|
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q;
|
34 |
|
|
|
35 |
|
|
} else
|
36 |
|
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q;
|
37 |
|
|
}
|
38 |
|
|
this.base_url = base_url+ q;
|
39 |
|
|
log.info("url from which to collect " + this.base_url);
|
40 |
|
|
prepare();
|
41 |
|
|
}catch(Exception ex){
|
42 |
|
|
throw new CollectorServiceException(ex);
|
43 |
|
|
}
|
44 |
|
|
}
|
45 |
|
|
|
46 |
|
|
private void prepare()throws Exception {
|
47 |
|
|
JSONObject json = new JSONObject(getPage(1));
|
48 |
|
|
total = json.getInt("total_count");
|
49 |
|
|
log.info("Total number of entries to collect: " + total);
|
50 |
|
|
fillTrials(json);
|
51 |
|
|
}
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
@Override
|
55 |
|
|
public Iterator<String> iterator() {
|
56 |
|
|
return new Iterator<String>(){
|
57 |
|
|
|
58 |
|
|
private int page_number = 2;
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
@Override
|
62 |
|
|
public void remove(){
|
63 |
|
|
|
64 |
|
|
}
|
65 |
|
|
|
66 |
|
|
@Override
|
67 |
|
|
public String next() {
|
68 |
|
|
try {
|
69 |
|
|
if (trials.isEmpty()) {
|
70 |
|
|
JSONObject json = new JSONObject(getPage(page_number));
|
71 |
|
|
fillTrials(json);
|
72 |
|
|
page_number++;
|
73 |
|
|
}
|
74 |
|
|
return trials.poll();
|
75 |
|
|
}catch(Exception ex){
|
76 |
|
|
throw new CollectorServiceRuntimeException(ex);
|
77 |
|
|
}
|
78 |
|
|
}
|
79 |
|
|
|
80 |
|
|
@Override
|
81 |
|
|
public boolean hasNext(){
|
82 |
|
|
log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total));
|
83 |
|
|
return (current < total || !trials.isEmpty());
|
84 |
|
|
}
|
85 |
|
|
|
86 |
|
|
|
87 |
|
|
};
|
88 |
|
|
|
89 |
|
|
}
|
90 |
|
|
|
91 |
|
|
private void fillTrials(JSONObject json)throws CollectorServiceException{
|
92 |
|
|
|
93 |
|
|
JSONArray entries = json.getJSONArray("items");
|
94 |
|
|
for(Object entry: entries) {
|
95 |
|
|
try {
|
96 |
|
|
trials.put(XML.toString(entry));
|
97 |
|
|
}catch(Exception ex){
|
98 |
|
|
throw new CollectorServiceException(ex);
|
99 |
|
|
}
|
100 |
|
|
current++;
|
101 |
|
|
}
|
102 |
|
|
|
103 |
|
|
}
|
104 |
|
|
private String getPage(int page_number)throws CollectorServiceException {
|
105 |
|
|
|
106 |
|
|
try {
|
107 |
|
|
URL url = new URL(base_url + "&page=" + page_number);
|
108 |
|
|
URLConnection conn = url.openConnection();
|
109 |
|
|
conn.setRequestProperty("User-Agent", "Mozilla/5.0");
|
110 |
|
|
return (IOUtils.toString(conn.getInputStream()));
|
111 |
|
|
}catch(Exception ex){
|
112 |
|
|
throw new CollectorServiceException(ex);
|
113 |
|
|
}
|
114 |
|
|
}
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
}
|