Revision 52783
Added by Miriam Baglioni over 5 years ago
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Test.java | ||
---|---|---|
3 | 3 |
import java.util.Iterator; |
4 | 4 |
|
5 | 5 |
import com.ximpleware.VTDGen; |
6 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
6 | 7 |
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString; |
7 | 8 |
import org.junit.Before; |
8 | 9 |
import org.junit.Ignore; |
... | ... | |
17 | 18 |
private String baseURL = "http://gtr.rcuk.ac.uk/gtr/api"; |
18 | 19 |
private Gtr2Helper helper; |
19 | 20 |
private Gtr2ProjectsIterable it; |
21 |
private HttpConnector connector; |
|
20 | 22 |
|
21 | 23 |
@Before |
22 | 24 |
public void prepare() { |
23 | 25 |
helper = new Gtr2Helper(); |
26 |
//System.setProperty("jsse.enableSNIExtension","false"); |
|
24 | 27 |
} |
25 | 28 |
|
26 | 29 |
@Test |
27 | 30 |
public void testOne() throws Exception { |
28 | 31 |
System.out.println("one project"); |
29 | 32 |
VTDGen vg_tmp = new VTDGen(); |
30 |
vg_tmp.parseHttpUrl("http://gtr.rcuk.ac.uk/gtr/api/projects/E178742B-571B-498F-8402-122F17C47546", false); |
|
33 |
connector = new HttpConnector(); |
|
34 |
byte[] bytes = connector.getInputSource("http://gtr.rcuk.ac.uk/gtr/api/projects/E178742B-571B-498F-8402-122F17C47546").getBytes("UTF-8"); |
|
35 |
//vg_tmp.parseHttpUrl("https://gtr.rcuk.ac.uk/gtr/api/projects/E178742B-571B-498F-8402-122F17C47546", false); |
|
36 |
vg_tmp.setDoc(bytes); |
|
37 |
vg_tmp.parse(false); |
|
31 | 38 |
String s = helper.processProject(vg_tmp.getNav(), "xmlns:ns=\"http:///afgshs\""); |
32 | 39 |
System.out.println(s); |
33 | 40 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2ProjectsIterable.java | ||
---|---|---|
19 | 19 |
import org.joda.time.DateTime; |
20 | 20 |
import org.joda.time.format.DateTimeFormat; |
21 | 21 |
import org.joda.time.format.DateTimeFormatter; |
22 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
22 | 23 |
|
23 | 24 |
/** |
24 | 25 |
* Created by alessia on 28/11/16. |
... | ... | |
46 | 47 |
//private boolean finished = false; |
47 | 48 |
private final ExecutorService es = Executors.newFixedThreadPool(PAGE_SZIE); |
48 | 49 |
private String nextElement = null; |
50 |
private HttpConnector connector; |
|
49 | 51 |
|
50 | 52 |
public Gtr2ProjectsIterable(final String baseUrl, final String fromDate) throws CollectorServiceException { |
51 | 53 |
prepare(baseUrl, fromDate); |
... | ... | |
60 | 62 |
} |
61 | 63 |
|
62 | 64 |
private void prepare(final String baseUrl, final String fromDate) { |
65 |
connector = new HttpConnector(); |
|
63 | 66 |
queryURL = baseUrl + "/projects"; |
64 | 67 |
vg = new VTDGen(); |
65 | 68 |
this.incremental = StringUtils.isNotBlank(fromDate); |
... | ... | |
78 | 81 |
private void fillInfo(final boolean all) throws CollectorServiceException { |
79 | 82 |
try { |
80 | 83 |
// log.debug("Getting hit count from: " + queryURL); |
81 |
vg.parseHttpUrl(queryURL, false); |
|
84 |
byte[] bytes = connector.getInputSource(queryURL).getBytes("UTF-8"); |
|
85 |
vg.setDoc(bytes); |
|
86 |
vg.parse(false); |
|
87 |
//vg.parseHttpUrl(queryURL, false); |
|
82 | 88 |
initParser(); |
83 | 89 |
String hitCount = vn.toNormalizedString(vn.getAttrVal("totalSize")); |
84 | 90 |
String totalPages = vn.toNormalizedString(vn.getAttrVal("totalPages")); |
... | ... | |
230 | 236 |
log.debug("Page: " + resultPageUrl); |
231 | 237 |
// clear VGen before processing the next file |
232 | 238 |
vg.clear(); |
233 |
vg.parseHttpUrl(resultPageUrl, false); |
|
239 |
byte[] bytes = connector.getInputSource(resultPageUrl).getBytes("UTF-8"); |
|
240 |
vg.setDoc(bytes); |
|
241 |
vg.parse(false); |
|
242 |
//vg.parseHttpUrl(resultPageUrl, false); |
|
234 | 243 |
initParser(); |
235 | 244 |
ap.selectXPath("//project"); |
236 | 245 |
int res; |
... | ... | |
273 | 282 |
public ParseProject(final String projectHref) { |
274 | 283 |
projectRef = projectHref; |
275 | 284 |
vg1 = new VTDGen(); |
276 |
vg1.parseHttpUrl(projectRef, false); |
|
277 |
vn1 = vg1.getNav(); |
|
285 |
try { |
|
286 |
byte[] bytes = connector.getInputSource(projectRef).getBytes("UTF-8"); |
|
287 |
vg1.setDoc(bytes); |
|
288 |
vg1.parse(false); |
|
289 |
//vg1.parseHttpUrl(projectRef, false); |
|
290 |
vn1 = vg1.getNav(); |
|
291 |
}catch(Throwable e){ |
|
292 |
log.error("Exception processing " + projectRef + "\n" + e.getMessage()); |
|
293 |
} |
|
278 | 294 |
} |
279 | 295 |
|
280 | 296 |
private int projectsUpdate(String attr) throws CollectorServiceException { |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Helper.java | ||
---|---|---|
7 | 7 |
import com.ximpleware.AutoPilot; |
8 | 8 |
import com.ximpleware.VTDGen; |
9 | 9 |
import com.ximpleware.VTDNav; |
10 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
10 | 11 |
import org.apache.commons.logging.Log; |
11 | 12 |
import org.apache.commons.logging.LogFactory; |
12 | 13 |
import org.apache.commons.lang3.*; |
... | ... | |
18 | 19 |
private VTDNav mainVTDNav; |
19 | 20 |
private AutoPilot mainAutoPilot; |
20 | 21 |
private StringWriter writer; |
22 |
private HttpConnector connector; |
|
21 | 23 |
//private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20); |
22 | 24 |
|
23 | 25 |
public String processProject(final VTDNav vn, final String namespaces) throws Exception { |
... | ... | |
70 | 72 |
|
71 | 73 |
private VTDNav setNavigator(final String httpUrl) { |
72 | 74 |
VTDGen vg_tmp = new VTDGen(); |
73 |
vg_tmp.parseHttpUrl(httpUrl, false); |
|
74 |
return vg_tmp.getNav(); |
|
75 |
connector = new HttpConnector(); |
|
76 |
try { |
|
77 |
byte[] bytes = connector.getInputSource(httpUrl).getBytes("UTF-8"); |
|
78 |
vg_tmp.setDoc(bytes); |
|
79 |
vg_tmp.parse(false); |
|
80 |
//vg_tmp.parseHttpUrl(httpUrl, false); |
|
81 |
return vg_tmp.getNav(); |
|
82 |
}catch (Throwable e){ |
|
83 |
return null; |
|
84 |
} |
|
75 | 85 |
} |
76 | 86 |
|
77 | 87 |
private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception { |
Also available in: Unified diff
use HttpConnector to download XML instead of VTDGen parse URL method