Revision 42339
Added by Alessia Bardi over 8 years ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/wt/WTProjectsIterator.java | ||
---|---|---|
3 | 3 |
import java.net.URL; |
4 | 4 |
import java.util.Iterator; |
5 | 5 |
import java.util.List; |
6 |
import java.util.NoSuchElementException; |
|
6 |
import java.util.Queue; |
|
7 |
import java.util.concurrent.PriorityBlockingQueue; |
|
7 | 8 |
|
8 |
import com.google.common.collect.Lists; |
|
9 | 9 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
10 | 10 |
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; |
11 | 11 |
import org.apache.commons.io.IOUtils; |
... | ... | |
23 | 23 |
private static String queryURL = "http://www.ebi.ac.uk/europepmc/GristAPI/rest/get/query=ga:%22Wellcome%20Trust%22&resultType=core"; |
24 | 24 |
private int pageNumber = 0; |
25 | 25 |
private SAXReader reader = new SAXReader(); |
26 |
private int listCounter = 0; |
|
27 |
private List<String> projectsInCurrentPage = Lists.newArrayList(); |
|
26 |
private Queue<String> projects = new PriorityBlockingQueue<String>(); |
|
27 |
private boolean morePages = true; |
|
28 |
//The following is for debug only |
|
29 |
private int nextCounter = 0; |
|
28 | 30 |
|
29 | 31 |
@Override |
30 | 32 |
public boolean hasNext() { |
... | ... | |
33 | 35 |
} catch (CollectorServiceException e) { |
34 | 36 |
throw new CollectorServiceRuntimeException(e); |
35 | 37 |
} |
36 |
return listCounter < projectsInCurrentPage.size();
|
|
38 |
return !projects.isEmpty();
|
|
37 | 39 |
} |
38 | 40 |
|
39 | 41 |
@Override |
40 | 42 |
public String next() { |
43 |
nextCounter++; |
|
44 |
log.debug(String.format("Calling next %s times. projects queue has %s elements", nextCounter, projects.size())); |
|
41 | 45 |
try { |
42 | 46 |
fillProjectListIfNeeded(); |
47 |
return projects.poll(); |
|
43 | 48 |
} catch (CollectorServiceException e) { |
44 | 49 |
throw new CollectorServiceRuntimeException(e); |
45 | 50 |
} |
46 |
String project = null; |
|
47 |
try { |
|
48 |
project = projectsInCurrentPage.get(listCounter); |
|
49 |
} catch (IndexOutOfBoundsException e) { |
|
50 |
throw new NoSuchElementException(); |
|
51 |
} |
|
52 |
listCounter++; |
|
53 |
return project; |
|
54 | 51 |
} |
55 | 52 |
|
56 |
/** |
|
57 |
* Re-fill the list of projects if it is empty |
|
58 |
* |
|
59 |
* @return true if the list has been re-filled, false if the list was not empty |
|
60 |
* @throws CollectorServiceException |
|
61 |
*/ |
|
62 | 53 |
private boolean fillProjectListIfNeeded() throws CollectorServiceException { |
63 |
if (projectsInCurrentPage.isEmpty() || listCounter >= projectsInCurrentPage.size()) {
|
|
54 |
if (morePages && projects.isEmpty()) {
|
|
64 | 55 |
String resultPage = getNextPage(); |
65 | 56 |
Document doc = null; |
66 | 57 |
try { |
67 | 58 |
doc = reader.read(IOUtils.toInputStream(resultPage)); |
68 | 59 |
List<Element> records = doc.selectNodes("//RecordList/Record"); |
69 | 60 |
if (records != null && !records.isEmpty()) { |
61 |
log.debug(String.format("Found %s projects in page", records.size())); |
|
70 | 62 |
for (Element p : records) { |
71 |
projectsInCurrentPage.add(p.asXML());
|
|
63 |
projects.add(p.asXML()); |
|
72 | 64 |
} |
73 |
listCounter = 0;
|
|
65 |
return true;
|
|
74 | 66 |
} else { |
75 | 67 |
log.info("No more projects to read at page nr. " + pageNumber); |
68 |
morePages = false; |
|
69 |
return false; |
|
76 | 70 |
} |
77 |
return true; |
|
78 | 71 |
} catch (DocumentException e) { |
79 | 72 |
throw new CollectorServiceException(e); |
80 | 73 |
} |
Also available in: Unified diff
Fixed collection of WT projects and using a queue instead of array list