Revision 52783
Added by Miriam Baglioni almost 6 years ago
Gtr2ProjectsIterable.java | ||
---|---|---|
19 | 19 |
import org.joda.time.DateTime; |
20 | 20 |
import org.joda.time.format.DateTimeFormat; |
21 | 21 |
import org.joda.time.format.DateTimeFormatter; |
22 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
22 | 23 |
|
23 | 24 |
/** |
24 | 25 |
* Created by alessia on 28/11/16. |
... | ... | |
46 | 47 |
//private boolean finished = false; |
47 | 48 |
private final ExecutorService es = Executors.newFixedThreadPool(PAGE_SZIE); |
48 | 49 |
private String nextElement = null; |
50 |
private HttpConnector connector; |
|
49 | 51 |
|
50 | 52 |
public Gtr2ProjectsIterable(final String baseUrl, final String fromDate) throws CollectorServiceException { |
51 | 53 |
prepare(baseUrl, fromDate); |
... | ... | |
60 | 62 |
} |
61 | 63 |
|
62 | 64 |
private void prepare(final String baseUrl, final String fromDate) { |
65 |
connector = new HttpConnector(); |
|
63 | 66 |
queryURL = baseUrl + "/projects"; |
64 | 67 |
vg = new VTDGen(); |
65 | 68 |
this.incremental = StringUtils.isNotBlank(fromDate); |
... | ... | |
78 | 81 |
private void fillInfo(final boolean all) throws CollectorServiceException { |
79 | 82 |
try { |
80 | 83 |
// log.debug("Getting hit count from: " + queryURL); |
81 |
vg.parseHttpUrl(queryURL, false); |
|
84 |
byte[] bytes = connector.getInputSource(queryURL).getBytes("UTF-8"); |
|
85 |
vg.setDoc(bytes); |
|
86 |
vg.parse(false); |
|
87 |
//vg.parseHttpUrl(queryURL, false); |
|
82 | 88 |
initParser(); |
83 | 89 |
String hitCount = vn.toNormalizedString(vn.getAttrVal("totalSize")); |
84 | 90 |
String totalPages = vn.toNormalizedString(vn.getAttrVal("totalPages")); |
... | ... | |
230 | 236 |
log.debug("Page: " + resultPageUrl); |
231 | 237 |
// clear VGen before processing the next file |
232 | 238 |
vg.clear(); |
233 |
vg.parseHttpUrl(resultPageUrl, false); |
|
239 |
byte[] bytes = connector.getInputSource(resultPageUrl).getBytes("UTF-8"); |
|
240 |
vg.setDoc(bytes); |
|
241 |
vg.parse(false); |
|
242 |
//vg.parseHttpUrl(resultPageUrl, false); |
|
234 | 243 |
initParser(); |
235 | 244 |
ap.selectXPath("//project"); |
236 | 245 |
int res; |
... | ... | |
273 | 282 |
public ParseProject(final String projectHref) { |
274 | 283 |
projectRef = projectHref; |
275 | 284 |
vg1 = new VTDGen(); |
276 |
vg1.parseHttpUrl(projectRef, false); |
|
277 |
vn1 = vg1.getNav(); |
|
285 |
try { |
|
286 |
byte[] bytes = connector.getInputSource(projectRef).getBytes("UTF-8"); |
|
287 |
vg1.setDoc(bytes); |
|
288 |
vg1.parse(false); |
|
289 |
//vg1.parseHttpUrl(projectRef, false); |
|
290 |
vn1 = vg1.getNav(); |
|
291 |
}catch(Throwable e){ |
|
292 |
log.error("Exception processing " + projectRef + "\n" + e.getMessage()); |
|
293 |
} |
|
278 | 294 |
} |
279 | 295 |
|
280 | 296 |
private int projectsUpdate(String attr) throws CollectorServiceException { |
Also available in: Unified diff
use HttpConnector to download XML instead of VTDGen parse URL method