1 |
8441
|
manos.karv
|
import java.util.Vector;
|
2 |
|
|
|
3 |
|
|
import org.apache.log4j.BasicConfigurator;
|
4 |
|
|
import org.junit.Test;
|
5 |
|
|
|
6 |
|
|
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler;
|
7 |
|
|
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor;
|
8 |
|
|
import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
|
9 |
|
|
|
10 |
|
|
|
11 |
|
|
public class SuperTester {
|
12 |
|
|
|
13 |
|
|
@Test
|
14 |
|
|
public void TestCrawlingAndExtraction() throws Exception {
|
15 |
|
|
BasicConfigurator.configure();
|
16 |
|
|
|
17 |
|
|
Crawler crawler = new Crawler();
|
18 |
|
|
ResourceExtractor extractor = new ResourceExtractor();
|
19 |
|
|
|
20 |
|
|
String idUrl = UrlFilter.resolveRedirections("http://www.di.uoa.gr");
|
21 |
|
|
System.out.println("Now processing " + idUrl);
|
22 |
|
|
Vector<String> urls = crawler.getLinks(idUrl);
|
23 |
|
|
System.out.println("Retrieved links are: "+ urls);
|
24 |
|
|
System.out.println("Resources seem to be available in: " + extractor.extractResource(urls));
|
25 |
|
|
System.out.println();
|
26 |
|
|
}
|
27 |
|
|
|
28 |
|
|
}
|