Project

General

Profile

« Previous | Next » 

Revision 47463

copying to move to latest codebase

View differences:

modules/uoa-resource-discovery/trunk/build/ivy.xml
1
<ivy-module version="1.0">
2
	<info organisation="driver" module="uoa-resource-discovery"
3
		status="integration" />
4
	<configurations defaultconfmapping="*->default">
5
		<conf name="default" />
6
		<conf name="junit" visibility="private" extends="default" />
7
	</configurations>
8
	<publications>
9
		<artifact name="uoa-resource-discovery" type="jar" />
10
		<artifact name="uoa-resource-discovery" type="src" ext="zip" />
11
	</publications>
12
	<dependencies>
13
		<dependency org="apache" name="commons-logging" rev="1.0.4" />
14
		<dependency org="org.w3c" name="tidy" rev="+" />
15
		<dependency name="log4j" org="log4j" rev="+"/>
16
		<dependency org="junit" name="junit" rev="+" conf="junit"/>
17
		
18
		<dependency org="net.matuschek" name="jobo" rev="1.4+" />
19
		<dependency org="DLS" name="jOAI" rev="2.0.9.3+" />
20
		
21
		<dependency org="driver" name="unibi-commons" rev="+" />
22
		<dependency org="driver" name="unibi-data-utility-featureextraction-plugins" rev="+" />
23
		
24
		<dependency org="com.thoughtworks" name="xstream" rev="+" />
25
		<dependency org="org.cyberneko" name="nekohtml" rev="+" />
26
		<dependency org="com.jira" name="heritrix-commons" rev="+" />
27
		<dependency org="com.jira" name="heritrix-modules" rev="+" />
28
		<dependency org="org.kryo" name="kryo" rev="+" />
29
		<dependency org="edu.indiana" name="xpp3" rev="+" />
30
	</dependencies>
31
</ivy-module>
modules/uoa-resource-discovery/trunk/build/ivysettings.xml
1
<ivysettings>
2

  
3
  <include file="${yvy.build.dir}/ivysettings.xml"/>
4
  <settings defaultResolver="yvy-resolver" default="yvy.default.cache" defaultCacheDir="${yvy.cache.dir}"/>
5

  
6
</ivysettings>
modules/uoa-resource-discovery/trunk/build/build.properties
1
source.dirs		= main
2

  
3
project.name		= uoa-resource-discovery
4
project.version		= 0.0.7
5
project.label		=
6

  
modules/uoa-resource-discovery/trunk/build/build.xml
1
<project name="uoa-resource-discovery" default="build">
2
	<!--
3
	organisation:		driver
4
	module: 		uoa-resource-discovery
5
	-->
6

  
7
	<property file="local.properties"/>
8

  
9
	<!-- Layout paths properties -->
10
	<property file="../../build/yvy-setup.properties"/>
11
	<property file="../../../../build/yvy-trunk.properties"/>
12
	<property file="../../../../../build/yvy-tag.properties"/>
13

  
14
	<property file="build.properties"/>
15
	<property name="yvy.build.common" value="${yvy.root.dir}/build/build-common.xml"/>
16

  
17
	<!-- Common build definitions -->
18
	<import file="${yvy.build.common}"/>
19
	
20
	<target name="build" depends="jar"/>
21

  
22
	<target name="default">
23
		<antcall target="${yvy.target.build}" inheritAll="true" inheritRefs="true"/>
24
	</target>
25

  
26
</project>
modules/uoa-resource-discovery/trunk/test/junit/record2.xml
1
<?xml version="1.0"?>  
2
  <record rank="0.4898979">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/29575</dri:objIdentifier>
6

  
7
        <dri:dateOfCollection>2009-12-30T00:16:01Z</dri:dateOfCollection>
8
      </header>
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier>Journal of Personality and Social Psychology 89, 696-716 (2005)</dr:CobjIdentifier>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14

  
15
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
16
        <dr:CobjDescriptionSynthesis/>
17
        <dr:repositoryName>DSpace at Utrecht University</dr:repositoryName>
18
        <dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink>
19
        <dr:repositoryCountry>NL</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Wicherts, J.M.</dc:creator>
22

  
23
        <dc:creator>Dolan, C.V.</dc:creator>
24
        <dc:creator>Hessen, D.J.</dc:creator>
25
        <dc:title>Stereotype threat and group differences in test performance: A question of measurement invariance.</dc:title>
26
        <dc:subject>ethnic differences</dc:subject>
27
        <dc:subject>sex differences</dc:subject>
28
        <dc:subject>measurement invariance</dc:subject>
29

  
30
        <dc:subject>Sociale Wetenschappen</dc:subject>
31
        <dc:subject>stereotype threat</dc:subject>
32
        <dc:subject>test performance</dc:subject>
33
        <dr:CobjCategory>0001</dr:CobjCategory>
34
        <dc:language>eng</dc:language>
35
        <dc:dateAccepted>2005-01-01</dc:dateAccepted>
36

  
37
        <dc:identifier>http://igitur-archive.library.uu.nl/fss/2008-0807-201603/UUindex.html</dc:identifier>
38
        <dc:publisher>American Psychological Association</dc:publisher>
39
        <dc:source/>
40
        <dc:contributor/>
41
        <dc:relation>0022-3514</dc:relation>
42
        <dc:description>Studies into the effects of stereotype threat (ST) on test performance have shed new light on race and sex differences in achievement and intelligence test scores. In this article,the authors relate ST theory to the psychometric concept of measurement invariance and show that ST effects may be viewed as a source of measurement bias. As such,ST effects are detectable by means of multigroup confirmatory factor analysis. This enables research into the generalizability of ST effects to real-life or high-stakes testing. The modeling approach is described in detail and applied to 3 experiments in which the amount of ST for minorities and women was manipulated. Results indicate that ST results in measurement bias of intelligence and mathematics tests.</dc:description>
43
      </metadata>
44

  
45
    </result>
46
  </record>
modules/uoa-resource-discovery/trunk/test/junit/TestXML.java
1
import java.util.Arrays;
2
import java.util.List;
3

  
4
import org.apache.log4j.BasicConfigurator;
5
import org.junit.Test;
6

  
7
import eu.dnetlib.common.utils.*;
8
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceDescriptionRecord;
9
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceUrls;
10
public class TestXML {
11
	
12
	@Test
13
	public void TestCrawlingAndExtraction() throws Exception {
14
		BasicConfigurator.configure();
15
		
16
		XMLSerializer<ResourceDescriptionRecord> handler = new XMLSerializer<ResourceDescriptionRecord>(ResourceDescriptionRecord.class);
17
		ResourceDescriptionRecord descrRecord = new ResourceDescriptionRecord();
18
		descrRecord.setObjectIdentifier("0239184018501");
19
		List<String> urlList = Arrays.asList("res1", "res2");
20

  
21
		ResourceUrls urls = new ResourceUrls();
22
		urls.setUrls(urlList);
23
		descrRecord.setResourceUrls(urls);
24
		
25
		System.out.println(handler.getAsXml(descrRecord));
26
	}
27
	
28
}
modules/uoa-resource-discovery/trunk/test/junit/record3.xml
1
<?xml version="1.0"?>
2
  <record rank="0.4618802">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/26815</dri:objIdentifier>
6
        <dri:dateOfCollection>2009-12-30T00:13:45Z</dri:dateOfCollection>
7
      </header>
8

  
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier>Logic Group Preprint Series 172 (2008)</dr:CobjIdentifier>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
15
        <dr:CobjDescriptionSynthesis/>
16

  
17
        <dr:repositoryName>DSpace at Utrecht University</dr:repositoryName>
18
        <dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink>
19
        <dr:repositoryCountry>NL</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Hollenberg, M.</dc:creator>
22
        <dc:title>Equational axioms of test algebra</dc:title>
23

  
24
        <dc:subject>Wijsbegeerte</dc:subject>
25
        <dr:CobjCategory>0000</dr:CobjCategory>
26
        <dc:language>eng</dc:language>
27
        <dc:dateAccepted>1996-12-09</dc:dateAccepted>
28
        <dc:identifier>http://igitur-archive.library.uu.nl/lg/2008-0326-201100/UUindex.html</dc:identifier>
29
        <dc:publisher/>
30

  
31
        <dc:source/>
32
        <dc:contributor/>
33
        <dc:relation/>
34
        <dc:description>We present a complete axiomatization of test algebra ([24,18,29]), the two-sorted algebraic variant of Propositional Dynamic Logic (PDL,[21,7]). The axiomatization consists of adding a finite number of equations to any axiomatization of Kleene algebra ([15,26,17,4]) and algebraic translations of the Segerberg ([27]) axioms for PDL. Kleene algebras are not finitely axiomatizable ([25,6]), so our result does not give us a finite axiomatization of test algebra: in fact, no finite equational axiomatization exists. We also present a single-sorted version of test algebra, using the notion of dynamic negation ([9,2,11]), to which the previous results carry over.</dc:description>
35
      </metadata>
36
    </result>
37
  </record>
modules/uoa-resource-discovery/trunk/test/junit/record4.xml
1
<?xml version="1.0"?>  
2
  <record rank="0.45643544">
3

  
4
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
5
      <header>
6
        <dri:objIdentifier>3297df8d-c100-44a2-8aa9-64729c406e05_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:www.tara.tcd.ie:2262/32968</dri:objIdentifier>
7
        <dri:dateOfCollection>2010-01-05T18:27:02Z</dri:dateOfCollection>
8
      </header>
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12

  
13
        <dr:CobjIdentifier>Wilson, S., Flood, B., Goyal, S., Mosher, J., Bergin, S., O'Brien, J., Kennedy, R. ...Parameter estimation for a model with both imperfect test and repair... in Proceedings of the IEEE VLSI Test Symposium, Berkeley, CA, 6-10 May 2007, IEEE, 2007, pp 271-276</dr:CobjIdentifier>
14
        <dr:CobjIdentifier>Y</dr:CobjIdentifier>
15
        <dr:CobjIdentifier>Y</dr:CobjIdentifier>
16
        <dr:CobjModel>OAI</dr:CobjModel>
17
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
18
        <dr:CobjDescriptionSynthesis/>
19

  
20
        <dr:repositoryName>TARA</dr:repositoryName>
21
        <dr:repositoryLink>http://www.tara.tcd.ie/</dr:repositoryLink>
22
        <dr:repositoryCountry>IE</dr:repositoryCountry>
23
        <dr:repositoryInstitution/>
24
        <dc:creator>WILSON, SIMON PAUL</dc:creator>
25
        <dc:title>Parameter estimation for a model with both imperfect test and repair</dc:title>
26

  
27
        <dc:subject>Statistics</dc:subject>
28
        <dr:CobjCategory>0004</dr:CobjCategory>
29
        <dc:language>eng</dc:language>
30
        <dc:dateAccepted>2009-09-18</dc:dateAccepted>
31
        <dc:identifier>http://hdl.handle.net/2262/32968</dc:identifier>
32

  
33
        <dc:publisher>IEEE</dc:publisher>
34
        <dc:source/>
35
        <dc:contributor/>
36
        <dc:relation/>
37
        <dc:description>The involvement of BLI researchers is supported by a grant from the Industrial Development Agency of Ireland. The involvement of CTVR researchers is supported by Science Foundation Ireland grant 03/CE3/I405.</dc:description>
38
        <dc:description>We describe estimation of the parameters of a manufacturing test and repair model using data available from that test. The model allows imperfect testing and imperfect repair. The principal problem that we address is of parameter identification, given insufficient data, that we address by making conservative assumptions on the property being measured and the associated parameter values. Several cases of commonly occurring test types, in the manufacture of electronic products, are considered.</dc:description>
39
        <dc:description>PUBLISHED</dc:description>
40

  
41
      </metadata>
42
    </result>
43
  </record>
modules/uoa-resource-discovery/trunk/test/junit/record5.xml
1
<?xml version="1.0"?>  
2
  <record rank="0.42426404">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/34935</dri:objIdentifier>
6
        <dri:dateOfCollection>2009-12-30T00:22:10Z</dri:dateOfCollection>
7

  
8
      </header>
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier/>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
15

  
16
        <dr:CobjDescriptionSynthesis/>
17
        <dr:repositoryName>DSpace at Utrecht University</dr:repositoryName>
18
        <dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink>
19
        <dr:repositoryCountry>NL</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Hoofd, M.G.V. van het</dc:creator>
22
        <dc:title>Exercise and depressionafter stroke, a systematic review Shuttle Walk Test in patients who suffered a stroke, a feasibility study</dc:title>
23

  
24
        <dc:subject>exercise</dc:subject>
25
        <dc:subject>Fysiotherapiewetenschap</dc:subject>
26
        <dc:subject>feasibility study</dc:subject>
27
        <dc:subject>systematic review Shuttle Walk Test in patients who suffered a stroke, a feasibility study: stroke</dc:subject>
28
        <dc:subject>Exercise and depressionafter stroke, a systematic review: stroke</dc:subject>
29
        <dc:subject>Shuttle Walk Test</dc:subject>
30

  
31
        <dc:subject>Geneeskunde</dc:subject>
32
        <dc:subject>depression</dc:subject>
33
        <dc:subject>aerobic capacity</dc:subject>
34
        <dr:CobjCategory>0007</dr:CobjCategory>
35
        <dc:language>eng</dc:language>
36
        <dc:dateAccepted>2009-06-30</dc:dateAccepted>
37

  
38
        <dc:identifier>http://igitur-archive.library.uu.nl/student-theses/2009-0807-200622/UUindex.html</dc:identifier>
39
        <dc:publisher/>
40
        <dc:source/>
41
        <dc:contributor>Brussel, M. van</dc:contributor>
42
        <dc:contributor>Port, I.G.L. van de</dc:contributor>
43
        <dc:contributor>Takken, T.</dc:contributor>
44
        <dc:relation/>
45

  
46
        <dc:description>Exercise and depression after stroke Purpose-Aim of this review is to summarize the evidence from (randomized) controlled trials regarding the effects of exercise on depression or depressive symptoms in patients who had suffered a stroke. Methods-Studies that included patients who suffered a stroke and measured an outcome concerning depression were systematically reviewed. After determining the methodological quality by the Pedro-scale, a best evidence synthesis was applied. Results-Two out of seven studies showed significant differences between both groups, in favor of the intervention group. Best evidence synthesis showed insufficient evidence for positive effects of exercise on depression in patients who suffered a stroke. Conclusion-From the studies included in the present review it cannot be concluded that exercise interventions had a positive effect on depression in patients who suffered a stroke. Key Words: stroke ... depression ... exercise ... systematic review Shuttle Walk Test in patients who suffered a stroke Objective-To evaluate the feasibility of the Shuttle Walk Test (SWT) and the Shuttle Run Test for children with cerebral palsy at GMFCS level II (SRT-II) in patients who suffered a stroke. Methods-Fifteen patients who suffered a stroke completed both the SWT and SRT-II to evaluate aerobic capacity. Results-Significant differences were found in maximum heart rate and test duration in favor of the SRT-II. No significant difference was found in perceived exertion. Conclusion-The SRT-II is more feasible to assess aerobic capacity in patients who suffered a stroke compared to the SWT. Key Words: stroke ... aerobic capacity ... feasibility study ... Shuttle Walk Test Nederlandse samenvatting De invloed van bewegen op depressie bij mensen met een CVA Doel-Dit review heeft als doel om te onderzoeken wat de invloed van bewegen is op depressie bij mensen met een CVA. Methode-Een systematische zoektocht is gedaan naar (gerandomiseerde), gecontroleerde studies waarin pati..nten met een CVA ge..ncludeerd waren en depressie als uitkomstmaat gemeten werd. Nadat de methodologische kwaliteit werd bepaald met de PEDro-schaal, is een best evidence synthese opgesteld. Resultaten-Twee van zeven studies lieten een significant, positief effect zien van bewegen op depressie bij mensen met een CVA. De best evidence synthese toonde onvoldoende bewijs voor positieve effecten van bewegen bij mensen met een CVA. Conclusie-Uit dit review kan niet geconcludeerd worden dat bewegen een positief effect heeft op depressie bij mensen met een CVA. Trefwoorden: CVA ... depressie ... bewegen ... systematisch review Een shuttle wandeltest bij mensen met een CVA Doel-In deze studie wordt de toepasbaarheid onderzocht van de Shuttle Wandel Test (SWT) en de Shuttle Run Test voor kinderen met een cerebrale parese met GMFCS-niveau II (SRT-II) bij mensen met een cerebrovasculair accident (CVA). Methode-Vijftien pati..nten met een CVA voerden de SWT en de SRT-II uit om het maximale inspanningsvermogen te meten. Resultaten-Significante verschillen werden gevonden wat betreft maximale hartslag en testduur tussen beide testen ten gunste van SRT-II. Er werd geen significant verschil gevonden in ervaren vermoeidheid (Borg-schaal). Conclusie-De SRT-II is beter toepasbaar om het maximale inspanningsvermogen te meten van mensen met een CVA dan de SWT. Trefwoorden: CVA ... maximaal inspanningsvermogen ... haalbaarheidsstudie ... Shuttle Walk Test</dc:description>
47
      </metadata>
48
    </result>
49
  </record>
modules/uoa-resource-discovery/trunk/test/junit/record6.xml
1
<?xml version="1.0"?>  
2
  <record rank="0.42426404">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/36190</dri:objIdentifier>
6

  
7
        <dri:dateOfCollection>2009-12-30T00:23:45Z</dri:dateOfCollection>
8
      </header>
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier/>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14

  
15
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
16
        <dr:CobjDescriptionSynthesis/>
17
        <dr:repositoryName>DSpace at Utrecht University</dr:repositoryName>
18
        <dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink>
19
        <dr:repositoryCountry>NL</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Junte, R.D.</dc:creator>
22

  
23
        <dc:title>Acaricide resistance in the blue cattle tick in South Africa; A comparison of three assays for determining tick resistance</dc:title>
24
        <dc:subject>Acaricide resistance, 3 South African Provinces, blue cattle tick, Riphicephalus (Boophilus) decoloratus, comparison, Adult Immersion Test, Shaw Larval Test, Larval Packet Test, cattle, resistant</dc:subject>
25
        <dc:subject>Diergeneeskunde</dc:subject>
26
        <dc:subject>Diergeneeskunde</dc:subject>
27
        <dr:CobjCategory>0007</dr:CobjCategory>
28
        <dc:language>eng</dc:language>
29

  
30
        <dc:dateAccepted>2007-08-21</dc:dateAccepted>
31
        <dc:identifier>http://igitur-archive.library.uu.nl/student-theses/2009-1015-200152/UUindex.html</dc:identifier>
32
        <dc:publisher/>
33
        <dc:source/>
34
        <dc:contributor>Dr. E. van Dalen, Prof. Dr. F. Jongejan</dc:contributor>
35
        <dc:relation/>
36
        <dc:description>The susceptibility of engorged females and larvae of Boophilus decoloratus ticks,- collected from cattle on breeding farms in Kwazula Natal, Eastern Cape and Limpopo provinces in South Africa-, for amitraz, cypermetrin and chlorfenvinphos. Was examined by means of the Adult Immersion test (AIT), the Shaw Larval Immersion Test (SLIT) and the Larval Packet Test (LPT). The results indicated resistance of Boophilus ticks to cypermetrin on all nine farms examined. On six farms ticks had developed resistance against amitraz demonstrated by all three tests procedures. On one farm ticks were still susceptible for amitraz in all tests, whereas in 2 remaining farms tests were not in agreement. Furthermore ticks were found susceptible for chlorfinvenphos on three farms in all three tests, whereas in 5 farms showed they appeared to be resistant. In general the results obtained with the different tests were in agreement.</dc:description>
37

  
38
      </metadata>
39
    </result>
40
  </record>
modules/uoa-resource-discovery/trunk/test/junit/record7.xml
1
<?xml version="1.0"?>  
2
  <record rank="0.42426404">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>9dffbf71-6914-40fe-b110-8e41a977ba90_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dro.dur.ac.uk.OAI2:1691</dri:objIdentifier>
6
        <dri:dateOfCollection>2009-12-29T22:13:17Z</dri:dateOfCollection>
7

  
8
      </header>
9
      <metadata>
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier>Gott, R. and Roberts, R. (2004) 'A written test for procedural understanding : a way forward for assessment in the UK science curriculum ?', Research in science &amp;amp;amp; technological education., 22 (1). pp. 5-21.</dr:CobjIdentifier>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
15

  
16
        <dr:CobjDescriptionSynthesis/>
17
        <dr:repositoryName>Durham Research Online</dr:repositoryName>
18
        <dr:repositoryLink>http://dro.dur.ac.uk</dr:repositoryLink>
19
        <dr:repositoryCountry>UK</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Gott, R.</dc:creator>
22
        <dc:creator>Roberts, R.</dc:creator>
23

  
24
        <dc:title>A written test for procedural understanding : a way forward for assessment in the UK science curriculum ?</dc:title>
25
        <dc:subject/>
26
        <dr:CobjCategory>0001</dr:CobjCategory>
27
        <dc:language>eng</dc:language>
28
        <dc:dateAccepted>2004-05-01</dc:dateAccepted>
29
        <dc:identifier>http://dx.doi.org/10.1080/0263514042000187511</dc:identifier>
30
        <dc:publisher>Routledge</dc:publisher>
31

  
32
        <dc:source/>
33
        <dc:contributor/>
34
        <dc:relation>http://igitur-archive.library.uu.nl/lg/2008-0326-201100/preprint172.pdf</dc:relation>
35
        <dc:relation>http://dx.doi.org/10.1080/0263514042000187511</dc:relation>
36
        <dc:description>A recent UK House of Commons report on Science 14-19 identified problems with coursework and argued for a greater emphasis on teaching and assessment of scientific literacy. This paper describes a written test for procedural understanding, given to 15 year olds, that addresses both of these issues. Comparisons are made between the scores on a written test of procedural understanding with both assessments made of subject knowledge and pupil accounts of investigations. The potential advantages of assessing procedural understanding by written tests are discussed.</dc:description>
37
      </metadata>
38
    </result>
39

  
40
  </record>
modules/uoa-resource-discovery/trunk/test/junit/SuperTester.java
1
import java.util.Vector;
2

  
3
import org.apache.log4j.BasicConfigurator;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler;
7
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor;
8
import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
9

  
10

  
11
public class SuperTester {
12

  
13
	@Test
14
	public void TestCrawlingAndExtraction() throws Exception {
15
		BasicConfigurator.configure();
16
		
17
		Crawler crawler = new Crawler();
18
		ResourceExtractor extractor = new ResourceExtractor();
19
		
20
		String idUrl = UrlFilter.resolveRedirections("http://www.di.uoa.gr");
21
		System.out.println("Now processing " + idUrl);
22
		Vector<String> urls = crawler.getLinks(idUrl);
23
		System.out.println("Retrieved links are: "+ urls);
24
		System.out.println("Resources seem to be available in: " + extractor.extractResource(urls));
25
		System.out.println();
26
	}
27
	
28
}
modules/uoa-resource-discovery/trunk/test/junit/record8.xml
1
<?xml version="1.0"?>    
2
  <record rank="0.42426404">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>9dffbf71-6914-40fe-b110-8e41a977ba90_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dro.dur.ac.uk.OAI2:1856</dri:objIdentifier>
6
        <dri:dateOfCollection>2009-12-29T22:13:24Z</dri:dateOfCollection>
7
      </header>
8
      <metadata>
9

  
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier>Remedios, R. and Ritchie, K. and Lieberman, D. A. (2005) 'I used to like it but now I don't : the effect of the transfer test in Northern Ireland on pupils' intrinsic motivation.', British journal of educational psychology., 75 (3). pp. 435-452.</dr:CobjIdentifier>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
15
        <dr:CobjDescriptionSynthesis/>
16
        <dr:repositoryName>Durham Research Online</dr:repositoryName>
17

  
18
        <dr:repositoryLink>http://dro.dur.ac.uk</dr:repositoryLink>
19
        <dr:repositoryCountry>UK</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Remedios, R.</dc:creator>
22
        <dc:creator>Lieberman, D. A.</dc:creator>
23
        <dc:creator>Ritchie, K.</dc:creator>
24

  
25
        <dc:title>I used to like it but now I don't : the effect of the transfer test in Northern Ireland on pupils' intrinsic motivation.</dc:title>
26
        <dc:subject/>
27
        <dr:CobjCategory>0001</dr:CobjCategory>
28
        <dc:language>eng</dc:language>
29
        <dc:dateAccepted>2005-09-01</dc:dateAccepted>
30
        <dc:identifier/>
31
        <dc:publisher>British Psychological Society</dc:publisher>
32

  
33
        <dc:source/>
34
        <dc:contributor/>
35
        <dc:relation>http://dro.dur.ac.uk/1856/</dc:relation>
36
        <dc:relation>http://dx.doi.org/10.1348/000709904X24771</dc:relation>
37
        <dc:description>Background. Research has suggested that the pressure of exams could undermine pupils' interest in their subjects, but almost all of this research has been conducted in laboratory settings. The Transfer Test in Northern Ireland provides an unusual opportunity to assess the effects of exam pressure in real life because some 10- and 11-year-olds sit a Transfer Test to be admitted to grammar school while others are not tested until they are 14. Aim. To assess the effect of exams on pupils' interest in their subjects both during the period before the exam and after the results are known. Sample. The sample comprised 66 pupils preparing to sit the Transfer Test and 55 not preparing for the test. Method. Pupils' interest in their school subjects was assessed by questionnaires administered 2 weeks before the Transfer Test and then again 2 weeks after the results were announced. Results. Surprisingly, prior to sitting the test, there was no significant difference in motivation between the test and no-test pupils. However, after sitting the test, the motivation of the test pupils decreased significantly relative to their no-test counterparts, despite the fact that most achieved the grades they needed for admission to grammar school. Conclusions. Exams provide a valuable tool for assessing academic progress, but under some circumstances they can reduce pupils' interest in the subjects they are studying. </dc:description>
38
      </metadata>
39
    </result>
40

  
41
  </record>
modules/uoa-resource-discovery/trunk/test/junit/MyResultDao.java
1
import java.util.ArrayList;
2
import java.util.List;
3

  
4
import eu.dnetlib.data.utility.featureextraction.dao.IResultDao;
5

  
6

  
7
public class MyResultDao implements IResultDao {
8
	
9
	ArrayList<String> results;
10
	
11
	public MyResultDao() {
12
		this.results = new ArrayList<String>();
13
	}
14

  
15
	@Override
16
	public void addResults(List<String> results) {
17
		for(String result : results)
18
			this.results.add(result);
19
	}
20

  
21
	@Override
22
	public void close() {
23
		// TODO Auto-generated method stub
24

  
25
	}
26

  
27
	@Override
28
	public int getNumberOfElements() {
29
		return results.size();
30
	}
31

  
32
	@Override
33
	public List<String> getResults(int from, int to) {
34
		return results.subList(from-1, to);
35
	}
36

  
37
}
modules/uoa-resource-discovery/trunk/test/junit/gr/uoa/di/resourcediscovery/test/Sample.java
1
package gr.uoa.di.resourcediscovery.test;
2

  
3
import gr.uoa.di.resourcediscovery.MethodProvider;
4
import gr.uoa.di.resourcediscovery.MethodProviderFileStorageImpl;
5
import gr.uoa.di.resourcediscovery.methods.ResourceDiscoveryMethod;
6
import gr.uoa.di.resourcediscovery.methods.XPathAndCrawl;
7

  
8
import java.net.URL;
9
import java.util.ArrayList;
10
import java.util.List;
11

  
12
import org.apache.log4j.BasicConfigurator;
13

  
14
public class Sample {
15
	
16
	public static void main(String[] args) throws Exception {
17
		BasicConfigurator.configure();
18
		
19
		URL conUrl = new URL("http://rudie.di.uoa.gr:8080/files/");
20
		
21
		MethodProvider provider = new MethodProviderFileStorageImpl("method-map.xml");
22
		ResourceDiscoveryMethod method = provider.getMethod(conUrl);
23
		
24
		if(method == null) {
25
			List<String> mimeTypes = new ArrayList<String>();
26
			mimeTypes.add("application/pdf");
27

  
28
			XPathAndCrawl xpath = new XPathAndCrawl(mimeTypes, "http://rudie.di.uoa.gr:8080/robots.txt");
29

  
30
			method = xpath;
31
		}
32
		
33
		System.out.println("resources found in: " + method.getResources(conUrl, provider));
34
	}
35

  
36
}
modules/uoa-resource-discovery/trunk/test/junit/MySourceDataProvider.java
1
import java.util.ArrayList;
2
import java.util.List;
3

  
4
import eu.dnetlib.data.utility.featureextraction.FeatureExtractionException;
5
import eu.dnetlib.data.utility.featureextraction.dataprovider.SourceDataProvider;
6

  
7

  
8
public class MySourceDataProvider implements SourceDataProvider {
9
	
10
	ArrayList<String> DMFRecords;
11
	
12
	public MySourceDataProvider() {
13
		this.DMFRecords = new ArrayList<String>();
14
	}
15
	
16
	public void addDMFRecord(String record) {
17
		DMFRecords.add(record);
18
	}
19

  
20
	@Override
21
	public List<String> getRecords(int from, int to)
22
			throws FeatureExtractionException {
23
		try {
24
			if(to >= this.DMFRecords.size())
25
				to = this.DMFRecords.size();
26
			return DMFRecords.subList(from-1, to);
27
		}
28
		catch(Exception e) {
29
			throw new FeatureExtractionException(e);
30
		}
31
	}
32

  
33
	@Override
34
	public int getSize() throws FeatureExtractionException {
35
		return this.DMFRecords.size();
36
	}
37

  
38
	@Override
39
	public String getStatus() throws FeatureExtractionException {
40
		// TODO Auto-generated method stub
41
		return null;
42
	}
43

  
44
}
modules/uoa-resource-discovery/trunk/test/junit/TestResourceDiscoverer.java
1
import java.util.Calendar;
2

  
3
import org.junit.BeforeClass;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler;
7
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor;
8
import eu.dnetlib.data.utility.resource_discovery.plugin.crawler.ResourceDiscoverer;
9

  
10

  
11
public class TestResourceDiscoverer {
12

  
13
	@BeforeClass
14
	public static void Config() {
15
		//BasicConfigurator.configure();
16
	}
17
	
18
	@Test
19
	public void test() throws Exception {
20
		String url = "http://elib.uni-stuttgart.de/opus/volltexte/1999/1/";
21
		Calendar cal = Calendar.getInstance();
22
		System.out.println(cal.getTime());
23
		ResourceDiscoverer discoverer = new ResourceDiscoverer();
24
		System.out.println(discoverer.getResources(url));
25
		cal = Calendar.getInstance();
26
		System.out.println(cal.getTime());
27
		Crawler crawler = new Crawler();
28
		ResourceExtractor extractor = new ResourceExtractor();
29
		System.out.println(extractor.extractResource(crawler.getLinks(url)));
30
		cal = Calendar.getInstance();
31
		System.out.println(cal.getTime());
32
	}
33
}
modules/uoa-resource-discovery/trunk/test/junit/TestPlugin.java
1
import java.io.BufferedReader;
2
import java.io.InputStreamReader;
3

  
4
import org.apache.log4j.BasicConfigurator;
5
import org.junit.*;
6

  
7
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceDiscoveryPlugin;
8

  
9

  
10
public class TestPlugin {
11
	
12
	MySourceDataProvider provider;
13
	static private int N = 7;
14
	
15
	@BeforeClass
16
	public static void Config() {
17
		BasicConfigurator.configure();
18
	}
19
	
20
	@Before
21
	public void InitializeProvider() throws Exception {
22
		provider = new MySourceDataProvider();
23
		for(int i=7;i<=N;i++) {
24
			BufferedReader br = new BufferedReader(new InputStreamReader(TestPlugin.class.getResourceAsStream("record"+i+".xml")));
25
			
26
			String line;
27
			String record = "";
28
			while((line = br.readLine()) != null)
29
				record += line;
30
			provider.addDMFRecord(record);
31
		}
32
	}
33
	
34
	@Test
35
	public void TestResourcePlugin() throws Exception {
36
		ResourceDiscoveryPlugin plugin = new ResourceDiscoveryPlugin();
37
		plugin.setDao(new MyResultDao());
38
		plugin.setSourceDataProvider(provider);
39
		plugin.init();
40
		
41
		//System.out.println(provider.getRecords(1, 2));
42
		//System.out.println(provider.getRecords(1, 2).size());
43
		plugin.execute();
44
		System.out.println("\n\n" + plugin.getDao().getResults(1, 1));
45
	}
46
	
47
}
modules/uoa-resource-discovery/trunk/test/junit/record1.xml
1
<?xml version="1.0"?>
2
  <record rank="0.99999994">
3
    <result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr">
4
      <header>
5
        <dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/34916</dri:objIdentifier>
6
        <dri:dateOfCollection>2009-12-30T00:22:07Z</dri:dateOfCollection>
7
      </header>
8
      <metadata>
9

  
10
        <dr:CobjContentSynthesis/>
11
        <dr:CobjTypology>Textual</dr:CobjTypology>
12
        <dr:CobjIdentifier/>
13
        <dr:CobjModel>OAI</dr:CobjModel>
14
        <dr:CobjMDFormats>oai_dc</dr:CobjMDFormats>
15
        <dr:CobjDescriptionSynthesis/>
16
        <dr:repositoryName>DSpace at Utrecht University</dr:repositoryName>
17

  
18
        <dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink>
19
        <dr:repositoryCountry>NL</dr:repositoryCountry>
20
        <dr:repositoryInstitution/>
21
        <dc:creator>Westerlaken, J.</dc:creator>
22
        <dc:title>Test</dc:title>
23
        <dc:subject>test</dc:subject>
24

  
25
        <dc:subject>Geneeskunde</dc:subject>
26
        <dc:subject>Verplegingswetenschap</dc:subject>
27
        <dr:CobjCategory>0007</dr:CobjCategory>
28
        <dc:language>dut/nla</dc:language>
29
        <dc:dateAccepted>2009-07-31</dc:dateAccepted>
30
        <dc:identifier>http://igitur-archive.library.uu.nl/dissertations/2005-1018-200018/index.htm</dc:identifier>
31

  
32
        <dc:publisher/>
33
        <dc:source/>
34
        <dc:contributor>Teat, test</dc:contributor>
35
        <dc:contributor>Test, Test</dc:contributor>
36
        <dc:relation/>
37
        <dc:description>Test</dc:description>
38
      </metadata>
39

  
40
    </result>
41
  </record>
modules/uoa-resource-discovery/trunk/.project
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>uoa-resource-discovery</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
	</buildSpec>
14
	<natures>
15
		<nature>org.eclipse.jdt.core.javanature</nature>
16
	</natures>
17
</projectDescription>
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/UnknownMethodException.java
1
package gr.uoa.di.resourcediscovery;
2

  
3
public class UnknownMethodException extends Exception {
4
	private static final long serialVersionUID = 760327436365242998L;
5

  
6
}
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/MethodProviderFileStorageImpl.java
1
package gr.uoa.di.resourcediscovery;
2

  
3
import gr.uoa.di.resourcediscovery.methods.ResourceDiscoveryMethod;
4

  
5
import java.io.File;
6
import java.io.FileNotFoundException;
7
import java.io.FileReader;
8
import java.io.FileWriter;
9
import java.io.IOException;
10
import java.net.URL;
11
import java.util.HashMap;
12

  
13
import com.thoughtworks.xstream.XStream;
14

  
15
public class MethodProviderFileStorageImpl implements MethodProvider {
16

  
17
	private String pathToFile = null;
18

  
19
	HashMap<URL, ResourceDiscoveryMethod> map = new HashMap<URL, ResourceDiscoveryMethod>();
20

  
21
	public MethodProviderFileStorageImpl() {
22

  
23
	}
24

  
25
	@SuppressWarnings("unchecked")
26
	public MethodProviderFileStorageImpl(String pathToFile) throws FileNotFoundException {
27
		XStream xstream = new XStream();
28
		if(!(new File(pathToFile).exists()))
29
			map = new  HashMap<URL, ResourceDiscoveryMethod>();
30
		else
31
			map = (HashMap<URL, ResourceDiscoveryMethod>) xstream.fromXML(new FileReader(new File(pathToFile)));
32
		this.pathToFile = pathToFile;
33
	}
34

  
35
	@Override
36
	public ResourceDiscoveryMethod getMethod(URL baseUrl) throws MalformedConfigurationException, UnknownMethodException, IOException {
37
		baseUrl = new URL(Toolkit.getRedirectedUrl(baseUrl.toString(), 500));
38
		ResourceDiscoveryMethod ret = map.get(new URL(baseUrl.getProtocol()+"://"+baseUrl.getHost()));
39
		return ret;
40
	}
41

  
42
	@Override
43
	public void setMethod(URL baseUrl, ResourceDiscoveryMethod method) {
44
		map.put(baseUrl, method);
45
		try {
46
			store();
47
		} catch (IOException e) {
48
			e.printStackTrace();
49
		}
50
	}
51

  
52
	public String getPathToFile() {
53
		return pathToFile;
54
	}
55

  
56
	public void setPathToFile(String pathToFile) {
57
		this.pathToFile = pathToFile;
58
	}
59

  
60
	public void store() throws IOException {
61
		XStream xstream = new XStream();
62
		xstream.toXML(map, new FileWriter(new File(pathToFile)));
63
	}
64

  
65
}
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/ResourceDiscoveryMethod.java
1
package gr.uoa.di.resourcediscovery.methods;
2

  
3
import gr.uoa.di.resourcediscovery.MethodProvider;
4

  
5
import java.io.IOException;
6
import java.net.URL;
7
import java.util.List;
8

  
9
import org.xml.sax.SAXException;
10

  
11
public interface ResourceDiscoveryMethod {
12

  
13
	public List<String> getResources(URL upageUrl, MethodProvider provider) throws SAXException, IOException;
14
}
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/XPathAndCrawl.java
1
package gr.uoa.di.resourcediscovery.methods;
2

  
3
import gr.uoa.di.resourcediscovery.MalformedConfigurationException;
4
import gr.uoa.di.resourcediscovery.MethodProvider;
5
import gr.uoa.di.resourcediscovery.Toolkit;
6

  
7
import java.io.BufferedReader;
8
import java.io.FileNotFoundException;
9
import java.io.IOException;
10
import java.io.InputStreamReader;
11
import java.net.MalformedURLException;
12
import java.net.URL;
13
import java.util.ArrayList;
14
import java.util.List;
15

  
16
import org.apache.log4j.Logger;
17
import org.archive.modules.net.RobotsDirectives;
18
import org.archive.modules.net.Robotstxt;
19
import org.cyberneko.html.parsers.DOMParser;
20
import org.w3c.dom.Document;
21
import org.w3c.dom.Node;
22
import org.w3c.dom.traversal.DocumentTraversal;
23
import org.w3c.dom.traversal.NodeFilter;
24
import org.w3c.dom.traversal.NodeIterator;
25
import org.xml.sax.SAXException;
26

  
27
public class XPathAndCrawl implements ResourceDiscoveryMethod {
28

  
29
	transient Logger logger = Logger.getLogger(XPathAndCrawl.class);
30

  
31
	private boolean resolveFrames = true;
32
	private boolean skipFirstPage = false;
33
	private long sleepMillis = 100;
34
	private boolean ignoreRobotsTxt = false;
35
	private String agentName = "OpenAIRE_Harvester";
36
	private List<String> mimeTypes = new ArrayList<String>();
37
	private boolean fallback = true;
38
	private String robotstxtUrl = null;
39

  
40
	transient private Robotstxt robot = null;
41
	transient private RobotsDirectives directives = null;
42

  
43
	private List<String> xpaths = new ArrayList<String>();
44

  
45
	public XPathAndCrawl() {
46
		this.ignoreRobotsTxt = true;
47
	}
48

  
49
	// you need one per repository!
50
	public XPathAndCrawl(List<String> mimeTypes, String robotstxtUrl) throws FileNotFoundException, IOException {
51
		this.mimeTypes.addAll(mimeTypes);
52

  
53
		if (robotstxtUrl != null) {
54
			URL url = new URL(robotstxtUrl);
55
			try {
56
				BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
57
				this.robot = new Robotstxt(in);
58
				this.directives = this.robot.getDirectivesFor(agentName);
59
			} catch (FileNotFoundException ex) {
60
				logger.debug("Robots.txt was not found at " + robotstxtUrl);
61
				ignoreRobotsTxt = true;
62
			}
63
		} else {
64
			ignoreRobotsTxt = true;
65
		}
66
	}
67

  
68
	public void setRobotstxt(String robotstxtUrl) throws FileNotFoundException, IOException {
69
		this.robotstxtUrl = robotstxtUrl;
70
		if (robotstxtUrl != null) {
71
			URL url = new URL(robotstxtUrl);
72
			try {
73
				BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
74
				this.robot = new Robotstxt(in);
75
				this.directives = this.robot.getDirectivesFor(agentName);
76
			} catch (FileNotFoundException ex) {
77
				logger.debug("Robots.txt was not found at " + robotstxtUrl);
78
				ignoreRobotsTxt = true;
79
			}
80
		} else {
81
			ignoreRobotsTxt = true;
82
		}
83
	}
84

  
85
	public String getRobotstxtUrl() {
86
		return robotstxtUrl;
87
	}
88

  
89
	@Override
90
	public List<String> getResources(URL upageUrl, MethodProvider provider) throws SAXException, IOException {
91

  
92
		String pageUrl = upageUrl.toString();
93
		
94
		logger.debug("Known xpaths: "+this.xpaths);
95
		
96
		pageUrl = Toolkit.getRedirectedUrl(pageUrl, this.sleepMillis);
97
		
98
		logger.debug("Resolved possible redirections. Url: "+pageUrl);
99
		
100
		List<String> ret = new ArrayList<String>();
101
		List<String> urls = new ArrayList<String>();
102
		urls.add(pageUrl);
103
		
104
		// check if url is a redirection
105
		
106
		
107
		if(this.mimeTypes.contains(Toolkit.getMimeType(pageUrl, this.sleepMillis))) {
108
			ret.add(Toolkit.makeAbsolute(pageUrl, new URL(pageUrl)));
109
			return ret;
110
		}
111

  
112
		if (this.resolveFrames) {
113
			DOMParser parser = new DOMParser();
114
			parser.parse(pageUrl);
115
			Document doc = parser.getDocument();
116
			urls.addAll(resolveFrames(doc, new URL(pageUrl)));
117
			logger.debug("urls after resolving frames: " + urls);
118
		}
119

  
120
		if (this.skipFirstPage) {
121
			List<String> addme = new ArrayList<String>();
122
			for (String url : urls) {
123
				DOMParser parser = new DOMParser();
124
				parser.parse(url);
125
				Document doc = parser.getDocument();
126
				addme.addAll(oneDepthDown(doc, new URL(url)));
127
			}
128

  
129
			urls.remove(pageUrl);
130

  
131
			if (this.resolveFrames) {
132
				for (String url : urls) {
133
					DOMParser parser = new DOMParser();
134
					parser.parse(url);
135
					Document doc = parser.getDocument();
136
					addme.addAll(resolveFrames(doc, new URL(url)));
137
				}
138
			}
139

  
140
			urls.addAll(addme);
141
			logger.debug("urls after skipping 1st page and resolving frames: " + urls);
142
		}
143

  
144
		for (String url : urls) {
145
			logger.debug("looking for resource in: " + url);
146
			try {
147
				url = Toolkit.makeAbsolute(url, new URL(pageUrl));
148
			} catch (Exception e) {
149
				e.printStackTrace();
150
				continue;
151
			}
152
			URL startingUrl = new URL(url);
153

  
154
			if (!this.ignoreRobotsTxt)
155
				if (!this.directives.allows(Toolkit.makeRelative(startingUrl))) {
156
					logger.debug("Skipping " + startingUrl + ". Disallowed by robots.txt directives.");
157
					continue;
158
				}
159

  
160
			if (this.xpaths.size() == 0) {
161
				logger.debug("No xpath information, crawling");
162
				// this for the first time
163
				DOMParser parser = new DOMParser();
164
				parser.parse(startingUrl.toString());
165
				Document doc = parser.getDocument();
166

  
167
				List<Node> resourceNodes = findNodesWithResource(doc, startingUrl);
168

  
169
				for (Node resourceNode : resourceNodes) {
170
					String xp = getXpathToRoot(resourceNode);
171
					xpaths.add(xp);
172
					logger.debug(xp);
173
				}
174
				
175
				try {
176
					URL methodUrl = new URL(pageUrl);
177
					provider.setMethod(new URL(methodUrl.getProtocol()+"://"+methodUrl.getHost()), this);
178
				} catch(MalformedConfigurationException e) {
179
					logger.error("Error updating xpath information", e);
180
				}
181

  
182
				for (String xp : xpaths) {
183
					String resourceUrl = getResourceUrl(xp, doc, startingUrl);
184
					if (resourceUrl != null) {
185
						logger.debug(resourceUrl);
186
						ret.add(resourceUrl);
187
					}
188
				}
189
			} else {
190
				// this is for the rest of the pages of the repo
191
				DOMParser parser = new DOMParser();
192
				parser.parse(startingUrl.toString());
193
				Document doc = parser.getDocument();
194

  
195
				for (String xp : xpaths) {
196
					String resourceUrl = getResourceUrl(xp, doc, startingUrl);
197
					if (resourceUrl != null) {
198
						logger.debug(resourceUrl);
199
						ret.add(resourceUrl);
200
					}
201
				}
202
			}
203
		}
204

  
205
		if (ret.size() == 0 && this.fallback) {
206
			// if no xpath contained the resource, try to find it and add
207
			// all the xpaths
208
			for (String url : urls) {
209
				logger.debug("looking for resource in (not found in xpath): " + url);
210

  
211
				try {
212
					url = Toolkit.makeAbsolute(url, new URL(pageUrl));
213
				} catch (Exception e) {
214
					e.printStackTrace();
215
					continue;
216
				}
217
				URL startingUrl = new URL(url);
218

  
219
				if (!this.ignoreRobotsTxt)
220
					if (!this.directives.allows(Toolkit.makeRelative(startingUrl))) {
221
						logger.debug("Skipping " + startingUrl + ". Disallowed by robots.txt directives.");
222
						continue;
223
					}
224

  
225
				DOMParser parser = new DOMParser();
226
				parser.parse(startingUrl.toString());
227
				Document doc = parser.getDocument();
228
				List<Node> resourceNodes = findNodesWithResource(doc, startingUrl);
229
				for (Node resourceNode : resourceNodes) {
230
					String xp = getXpathToRoot(resourceNode);
231
					xpaths.add(xp);
232
					logger.debug(xp);
233
				}
234
				
235
				try {
236
					URL methodUrl = new URL(pageUrl);
237
					provider.setMethod(new URL(methodUrl.getProtocol()+"://"+methodUrl.getHost()), this);
238
				} catch(MalformedConfigurationException e) {
239
					logger.error("Error updating xpath information", e);
240
				}
241
				
242
				for (String xp : xpaths) {
243
					String resourceUrl = getResourceUrl(xp, doc, startingUrl);
244
					if (resourceUrl != null) {
245
						logger.debug(resourceUrl);
246
						ret.add(resourceUrl);
247
					}
248
				}
249
			}
250
		}
251

  
252
		return ret;
253
	}
254

  
255
	private List<String> resolveFrames(Document doc, URL connectionUrl) {
256
		List<String> ret = new ArrayList<String>();
257

  
258
		DocumentTraversal traversal = (DocumentTraversal) doc;
259

  
260
		NodeIterator iterator = null;
261
		try {
262
			iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true);
263
		} catch (Exception e) {
264
			e.printStackTrace();
265
			return ret;
266
		}
267

  
268
		for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
269
			if (n.getNodeName().equals("FRAME") || n.getNodeName().equals("IFRAME")) {
270
				String url = n.getAttributes().getNamedItem("src").getNodeValue();
271
				try {
272
					url = Toolkit.makeAbsolute(url, connectionUrl);
273
					ret.add(url);
274
				} catch (MalformedURLException ex) {
275
					continue;
276
				}
277
			}
278
		}
279
		return ret;
280
	}
281

  
282
	private List<String> oneDepthDown(Document doc, URL connectionUrl) throws IOException {
283
		List<String> ret = new ArrayList<String>();
284

  
285
		DocumentTraversal traversal = (DocumentTraversal) doc;
286

  
287
		NodeIterator iterator = null;
288
		try {
289
			iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true);
290
		} catch (Exception e) {
291
			e.printStackTrace();
292
			return ret;
293
		}
294

  
295
		for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
296
			if (n.getNodeName().equals("A")) {
297
				String url = n.getAttributes().getNamedItem("href").getNodeValue();
298
				try {
299
					url = Toolkit.makeAbsolute(url, connectionUrl);
300
					if (Toolkit.getMimeType(url, this.sleepMillis).trim().contains("text/html"))
301
						ret.add(url);
302
				} catch (MalformedURLException ex) {
303
					continue;
304
				}
305
			}
306
		}
307
		return ret;
308
	}
309

  
310
	private String getXpathToRoot(Node node) {
311
		String xpath = "";
312
		do {
313
			if (node.getNodeName().equals("HTML")) {
314
				int before = 1;
315
				while ((node = node.getPreviousSibling()) != null)
316
					before++;
317
				return "/HTML["+before+"]" + xpath;
318
			}
319
			int before = 0;
320
			Node current = node;
321
			while ((current = current.getPreviousSibling()) != null)
322
				if (current.getNodeName().equals(node.getNodeName()))
323
					before++;
324
			xpath = "/" + node.getNodeName() + "[" + (before + 1) + "]" + xpath;
325
		} while ((node = node.getParentNode()) != null);
326
		return xpath;
327
	}
328

  
329
	private List<Node> findNodesWithResource(Document doc, URL connectionUrl) throws IOException {
330
		List<Node> ret = new ArrayList<Node>();
331

  
332
		DocumentTraversal traversal = (DocumentTraversal) doc;
333

  
334
		NodeIterator iterator = null;
335
		try {
336
			iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true);
337
		} catch (Exception e) {
338
			e.printStackTrace();
339
			return ret;
340
		}
341

  
342
		for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
343
			if (n.getNodeName().equals("A")) {
344
				String url = null;
345
				try {
346
					url = n.getAttributes().getNamedItem("href").getNodeValue();
347
				} catch(NullPointerException e) {
348
					// anchor without href
349
					continue;
350
				}
351
				if (url == null)
352
					continue;
353
				try {
354
					url = Toolkit.makeAbsolute(url, connectionUrl);
355
					if (this.mimeTypes.contains(Toolkit.getMimeType(url, this.sleepMillis).trim()))
356
						ret.add(n);
357
				} catch (MalformedURLException ex) {
358
					continue;
359
				}
360
			}
361
		}
362
		return ret;
363
	}
364

  
365
	private String getResourceUrl(String xpath, Document doc, URL url) throws MalformedURLException {
366
		try {
367
			Node current = doc.getFirstChild();
368
			String[] elements = xpath.split("/");
369
			for (String element : elements) {
370
				if (element.trim().equals(""))
371
					continue;
372
				int position = Integer.parseInt(element.substring(element.indexOf('[')).replaceAll("\\[", "").replaceAll("\\]", ""));
373
				String name = element.substring(0, element.indexOf('['));
374
				int found = 0;
375
				do {
376
					if (current.getNodeName().equals(name)) {
377
						found++;
378
						if (found == position) {
379
							current = current.getFirstChild();
380
							break;
381
						}
382
					}
383
				} while ((current = current.getNextSibling()) != null);
384

  
385
			}
386
			String ret = current.getParentNode().getAttributes().getNamedItem("href").getNodeValue();
387
			return Toolkit.makeAbsolute(ret, url);
388
		} catch (Exception e) {
389
			return null;
390
		}
391
	}
392

  
393
	private Object readResolve() throws IOException {
394
		if (robotstxtUrl != null) {
395
			URL url = new URL(robotstxtUrl);
396
			BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
397
			this.robot = new Robotstxt(in);
398
			this.directives = this.robot.getDirectivesFor(agentName);
399
		} else {
400
			ignoreRobotsTxt = true;
401
		}
402
		logger = Logger.getLogger(XPathAndCrawl.class);
403
		return this;
404
	}
405

  
406
	public boolean isResolveFrames() {
407
		return resolveFrames;
408
	}
409

  
410
	public void setResolveFrames(boolean resolveFrames) {
411
		this.resolveFrames = resolveFrames;
412
	}
413

  
414
	public boolean isSkipFirstPage() {
415
		return skipFirstPage;
416
	}
417

  
418
	public void setSkipFirstPage(boolean skipFirstPage) {
419
		this.skipFirstPage = skipFirstPage;
420
	}
421

  
422
	public long getSleepMillis() {
423
		return sleepMillis;
424
	}
425

  
426
	public void setSleepMillis(long sleepMillis) {
427
		this.sleepMillis = sleepMillis;
428
	}
429

  
430
	public List<String> getMimeTypes() {
431
		return mimeTypes;
432
	}
433

  
434
	public void setMimeTypes(List<String> mimeTypes) {
435
		this.mimeTypes = mimeTypes;
436
	}
437

  
438
	public List<String> getXpaths() {
439
		return xpaths;
440
	}
441

  
442
	public void setXpaths(List<String> xpaths) {
443
		this.xpaths = xpaths;
444
	}
445

  
446
	public void setIgnoreRobotsTxt(boolean ignoreRobotsTxt) {
447
		this.ignoreRobotsTxt = ignoreRobotsTxt;
448
	}
449

  
450
	public boolean isIgnoreRobotsTxt() {
451
		return ignoreRobotsTxt;
452
	}
453

  
454
	public void setAgentName(String agentName) {
455
		this.agentName = agentName;
456
		this.directives = this.robot.getDirectivesFor(agentName);
457
	}
458

  
459
	public String getAgentName() {
460
		return agentName;
461
	}
462

  
463
	public void setFallback(boolean fallback) {
464
		this.fallback = fallback;
465
	}
466

  
467
	public boolean isFallback() {
468
		return fallback;
469
	}
470

  
471
}
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/URLTransformation.java
1
package gr.uoa.di.resourcediscovery.methods;
2

  
3
import gr.uoa.di.resourcediscovery.MethodProvider;
4

  
5
import java.net.URL;
6
import java.util.ArrayList;
7
import java.util.List;
8

  
9
public class URLTransformation implements ResourceDiscoveryMethod {
10

  
11
	private String regex = null, replacement = "";
12
	private String addToEnd = "";
13

  
14
	@Override
15
	public List<String> getResources(URL upageUrl, MethodProvider provider) {
16
		String pageUrl = upageUrl.toString();
17
		String trsf = pageUrl;
18
		if (regex != null && !regex.trim().equals(""))
19
			trsf = pageUrl.replaceAll(regex, replacement);
20

  
21
		trsf = trsf + addToEnd;
22

  
23
		List<String> ret = new ArrayList<String>();
24
		ret.add(trsf);
25

  
26
		return ret;
27
	}
28

  
29
	public String getRegex() {
30
		return regex;
31
	}
32

  
33
	public void setRegex(String regex) {
34
		this.regex = regex;
35
	}
36

  
37
	public String getAddToEnd() {
38
		return addToEnd;
39
	}
40

  
41
	public void setAddToEnd(String addToEnd) {
42
		this.addToEnd = addToEnd;
43
	}
44

  
45
	public String getReplacement() {
46
		return replacement;
47
	}
48

  
49
	public void setReplacement(String replacement) {
50
		this.replacement = replacement;
51
	}
52

  
53
}
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/Toolkit.java
1
package gr.uoa.di.resourcediscovery;
2

  
3
import java.io.IOException;
4
import java.net.HttpURLConnection;
5
import java.net.MalformedURLException;
6
import java.net.URL;
7
import java.net.URLConnection;
8

  
9
import org.apache.log4j.Logger;
10

  
11
public class Toolkit {
12
	
13
	static transient Logger logger = Logger.getLogger(Toolkit.class);
14
	static int timeout = 10000;
15

  
16
	static public String makeAbsolute(String url, URL connectionUrl) throws MalformedURLException {
17
		return new URL(connectionUrl, url).toString();
18
	}
19

  
20
	static public String makeRelative(URL connectionUrl) throws MalformedURLException {
21
		return connectionUrl.getPath();
22
	}
23
	
24
	static public String getRedirectedUrl(String resourceURL, long sleepMillis) throws IOException, MalformedURLException {
25
		URL url = null;
26

  
27
		try {
28
			url = new URL(resourceURL);
29
		} catch (MalformedURLException mue) {
30
			logger.error("Error opening first url", mue);
31
			throw mue;
32
		}
33

  
34
		HttpURLConnection.setFollowRedirects(false);
35

  
36
		HttpURLConnection conn = null;
37
		try {
38
			Thread.sleep(sleepMillis);
39
			conn = (HttpURLConnection) url.openConnection();
40
			conn.setConnectTimeout(timeout);
41
			conn.setReadTimeout(timeout);
42
			conn.setAllowUserInteraction(false);         
43
			conn.setDoOutput(true);
44
		} catch (ClassCastException ex) {
45
			throw new MalformedURLException();
46
		} catch (InterruptedException e) {
47
			e.printStackTrace();
48
		}
49

  
50
		conn.setRequestMethod("HEAD");
51

  
52
		try {
53
			conn = openConnectionCheckRedirects(conn, sleepMillis);
54
		} catch (Exception ex) {
55
			throw new MalformedURLException();
56
		}
57

  
58
		try {
59
			Thread.sleep(sleepMillis);
60
		} catch (InterruptedException e) {
61
			e.printStackTrace();
62
		}
63
		int statusCode = conn.getResponseCode();
64
		if (statusCode == 503) {
65
			logger.error("Url " + conn.getURL() + " reported status code 503. Please increase the crawler's sleep time.");
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff