1
|
import java.util.Vector;
|
2
|
|
3
|
import org.apache.log4j.BasicConfigurator;
|
4
|
import org.junit.Test;
|
5
|
|
6
|
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler;
|
7
|
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor;
|
8
|
import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
|
9
|
|
10
|
|
11
|
public class SuperTester {
|
12
|
|
13
|
@Test
|
14
|
public void TestCrawlingAndExtraction() throws Exception {
|
15
|
BasicConfigurator.configure();
|
16
|
|
17
|
Crawler crawler = new Crawler();
|
18
|
ResourceExtractor extractor = new ResourceExtractor();
|
19
|
|
20
|
String idUrl = UrlFilter.resolveRedirections("http://www.di.uoa.gr");
|
21
|
System.out.println("Now processing " + idUrl);
|
22
|
Vector<String> urls = crawler.getLinks(idUrl);
|
23
|
System.out.println("Retrieved links are: "+ urls);
|
24
|
System.out.println("Resources seem to be available in: " + extractor.extractResource(urls));
|
25
|
System.out.println();
|
26
|
}
|
27
|
|
28
|
}
|