Revision 47463
Added by Antonis Lempesis almost 6 years ago
modules/uoa-resource-discovery/trunk/build/ivy.xml | ||
---|---|---|
1 |
<ivy-module version="1.0"> |
|
2 |
<info organisation="driver" module="uoa-resource-discovery" |
|
3 |
status="integration" /> |
|
4 |
<configurations defaultconfmapping="*->default"> |
|
5 |
<conf name="default" /> |
|
6 |
<conf name="junit" visibility="private" extends="default" /> |
|
7 |
</configurations> |
|
8 |
<publications> |
|
9 |
<artifact name="uoa-resource-discovery" type="jar" /> |
|
10 |
<artifact name="uoa-resource-discovery" type="src" ext="zip" /> |
|
11 |
</publications> |
|
12 |
<dependencies> |
|
13 |
<dependency org="apache" name="commons-logging" rev="1.0.4" /> |
|
14 |
<dependency org="org.w3c" name="tidy" rev="+" /> |
|
15 |
<dependency name="log4j" org="log4j" rev="+"/> |
|
16 |
<dependency org="junit" name="junit" rev="+" conf="junit"/> |
|
17 |
|
|
18 |
<dependency org="net.matuschek" name="jobo" rev="1.4+" /> |
|
19 |
<dependency org="DLS" name="jOAI" rev="2.0.9.3+" /> |
|
20 |
|
|
21 |
<dependency org="driver" name="unibi-commons" rev="+" /> |
|
22 |
<dependency org="driver" name="unibi-data-utility-featureextraction-plugins" rev="+" /> |
|
23 |
|
|
24 |
<dependency org="com.thoughtworks" name="xstream" rev="+" /> |
|
25 |
<dependency org="org.cyberneko" name="nekohtml" rev="+" /> |
|
26 |
<dependency org="com.jira" name="heritrix-commons" rev="+" /> |
|
27 |
<dependency org="com.jira" name="heritrix-modules" rev="+" /> |
|
28 |
<dependency org="org.kryo" name="kryo" rev="+" /> |
|
29 |
<dependency org="edu.indiana" name="xpp3" rev="+" /> |
|
30 |
</dependencies> |
|
31 |
</ivy-module> |
modules/uoa-resource-discovery/trunk/build/ivysettings.xml | ||
---|---|---|
1 |
<ivysettings> |
|
2 |
|
|
3 |
<include file="${yvy.build.dir}/ivysettings.xml"/> |
|
4 |
<settings defaultResolver="yvy-resolver" default="yvy.default.cache" defaultCacheDir="${yvy.cache.dir}"/> |
|
5 |
|
|
6 |
</ivysettings> |
modules/uoa-resource-discovery/trunk/build/build.properties | ||
---|---|---|
1 |
source.dirs = main |
|
2 |
|
|
3 |
project.name = uoa-resource-discovery |
|
4 |
project.version = 0.0.7 |
|
5 |
project.label = |
|
6 |
|
modules/uoa-resource-discovery/trunk/build/build.xml | ||
---|---|---|
1 |
<project name="uoa-resource-discovery" default="build"> |
|
2 |
<!-- |
|
3 |
organisation: driver |
|
4 |
module: uoa-resource-discovery |
|
5 |
--> |
|
6 |
|
|
7 |
<property file="local.properties"/> |
|
8 |
|
|
9 |
<!-- Layout paths properties --> |
|
10 |
<property file="../../build/yvy-setup.properties"/> |
|
11 |
<property file="../../../../build/yvy-trunk.properties"/> |
|
12 |
<property file="../../../../../build/yvy-tag.properties"/> |
|
13 |
|
|
14 |
<property file="build.properties"/> |
|
15 |
<property name="yvy.build.common" value="${yvy.root.dir}/build/build-common.xml"/> |
|
16 |
|
|
17 |
<!-- Common build definitions --> |
|
18 |
<import file="${yvy.build.common}"/> |
|
19 |
|
|
20 |
<target name="build" depends="jar"/> |
|
21 |
|
|
22 |
<target name="default"> |
|
23 |
<antcall target="${yvy.target.build}" inheritAll="true" inheritRefs="true"/> |
|
24 |
</target> |
|
25 |
|
|
26 |
</project> |
modules/uoa-resource-discovery/trunk/test/junit/record2.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.4898979"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/29575</dri:objIdentifier> |
|
6 |
|
|
7 |
<dri:dateOfCollection>2009-12-30T00:16:01Z</dri:dateOfCollection> |
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier>Journal of Personality and Social Psychology 89, 696-716 (2005)</dr:CobjIdentifier> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
|
|
15 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
16 |
<dr:CobjDescriptionSynthesis/> |
|
17 |
<dr:repositoryName>DSpace at Utrecht University</dr:repositoryName> |
|
18 |
<dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>NL</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Wicherts, J.M.</dc:creator> |
|
22 |
|
|
23 |
<dc:creator>Dolan, C.V.</dc:creator> |
|
24 |
<dc:creator>Hessen, D.J.</dc:creator> |
|
25 |
<dc:title>Stereotype threat and group differences in test performance: A question of measurement invariance.</dc:title> |
|
26 |
<dc:subject>ethnic differences</dc:subject> |
|
27 |
<dc:subject>sex differences</dc:subject> |
|
28 |
<dc:subject>measurement invariance</dc:subject> |
|
29 |
|
|
30 |
<dc:subject>Sociale Wetenschappen</dc:subject> |
|
31 |
<dc:subject>stereotype threat</dc:subject> |
|
32 |
<dc:subject>test performance</dc:subject> |
|
33 |
<dr:CobjCategory>0001</dr:CobjCategory> |
|
34 |
<dc:language>eng</dc:language> |
|
35 |
<dc:dateAccepted>2005-01-01</dc:dateAccepted> |
|
36 |
|
|
37 |
<dc:identifier>http://igitur-archive.library.uu.nl/fss/2008-0807-201603/UUindex.html</dc:identifier> |
|
38 |
<dc:publisher>American Psychological Association</dc:publisher> |
|
39 |
<dc:source/> |
|
40 |
<dc:contributor/> |
|
41 |
<dc:relation>0022-3514</dc:relation> |
|
42 |
<dc:description>Studies into the effects of stereotype threat (ST) on test performance have shed new light on race and sex differences in achievement and intelligence test scores. In this article,the authors relate ST theory to the psychometric concept of measurement invariance and show that ST effects may be viewed as a source of measurement bias. As such,ST effects are detectable by means of multigroup confirmatory factor analysis. This enables research into the generalizability of ST effects to real-life or high-stakes testing. The modeling approach is described in detail and applied to 3 experiments in which the amount of ST for minorities and women was manipulated. Results indicate that ST results in measurement bias of intelligence and mathematics tests.</dc:description> |
|
43 |
</metadata> |
|
44 |
|
|
45 |
</result> |
|
46 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/TestXML.java | ||
---|---|---|
1 |
import java.util.Arrays; |
|
2 |
import java.util.List; |
|
3 |
|
|
4 |
import org.apache.log4j.BasicConfigurator; |
|
5 |
import org.junit.Test; |
|
6 |
|
|
7 |
import eu.dnetlib.common.utils.*; |
|
8 |
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceDescriptionRecord; |
|
9 |
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceUrls; |
|
10 |
public class TestXML { |
|
11 |
|
|
12 |
@Test |
|
13 |
public void TestCrawlingAndExtraction() throws Exception { |
|
14 |
BasicConfigurator.configure(); |
|
15 |
|
|
16 |
XMLSerializer<ResourceDescriptionRecord> handler = new XMLSerializer<ResourceDescriptionRecord>(ResourceDescriptionRecord.class); |
|
17 |
ResourceDescriptionRecord descrRecord = new ResourceDescriptionRecord(); |
|
18 |
descrRecord.setObjectIdentifier("0239184018501"); |
|
19 |
List<String> urlList = Arrays.asList("res1", "res2"); |
|
20 |
|
|
21 |
ResourceUrls urls = new ResourceUrls(); |
|
22 |
urls.setUrls(urlList); |
|
23 |
descrRecord.setResourceUrls(urls); |
|
24 |
|
|
25 |
System.out.println(handler.getAsXml(descrRecord)); |
|
26 |
} |
|
27 |
|
|
28 |
} |
modules/uoa-resource-discovery/trunk/test/junit/record3.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.4618802"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/26815</dri:objIdentifier> |
|
6 |
<dri:dateOfCollection>2009-12-30T00:13:45Z</dri:dateOfCollection> |
|
7 |
</header> |
|
8 |
|
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier>Logic Group Preprint Series 172 (2008)</dr:CobjIdentifier> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
15 |
<dr:CobjDescriptionSynthesis/> |
|
16 |
|
|
17 |
<dr:repositoryName>DSpace at Utrecht University</dr:repositoryName> |
|
18 |
<dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>NL</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Hollenberg, M.</dc:creator> |
|
22 |
<dc:title>Equational axioms of test algebra</dc:title> |
|
23 |
|
|
24 |
<dc:subject>Wijsbegeerte</dc:subject> |
|
25 |
<dr:CobjCategory>0000</dr:CobjCategory> |
|
26 |
<dc:language>eng</dc:language> |
|
27 |
<dc:dateAccepted>1996-12-09</dc:dateAccepted> |
|
28 |
<dc:identifier>http://igitur-archive.library.uu.nl/lg/2008-0326-201100/UUindex.html</dc:identifier> |
|
29 |
<dc:publisher/> |
|
30 |
|
|
31 |
<dc:source/> |
|
32 |
<dc:contributor/> |
|
33 |
<dc:relation/> |
|
34 |
<dc:description>We present a complete axiomatization of test algebra ([24,18,29]), the two-sorted algebraic variant of Propositional Dynamic Logic (PDL,[21,7]). The axiomatization consists of adding a finite number of equations to any axiomatization of Kleene algebra ([15,26,17,4]) and algebraic translations of the Segerberg ([27]) axioms for PDL. Kleene algebras are not finitely axiomatizable ([25,6]), so our result does not give us a finite axiomatization of test algebra: in fact, no finite equational axiomatization exists. We also present a single-sorted version of test algebra, using the notion of dynamic negation ([9,2,11]), to which the previous results carry over.</dc:description> |
|
35 |
</metadata> |
|
36 |
</result> |
|
37 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/record4.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.45643544"> |
|
3 |
|
|
4 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
5 |
<header> |
|
6 |
<dri:objIdentifier>3297df8d-c100-44a2-8aa9-64729c406e05_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:www.tara.tcd.ie:2262/32968</dri:objIdentifier> |
|
7 |
<dri:dateOfCollection>2010-01-05T18:27:02Z</dri:dateOfCollection> |
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
|
|
13 |
<dr:CobjIdentifier>Wilson, S., Flood, B., Goyal, S., Mosher, J., Bergin, S., O'Brien, J., Kennedy, R. ...Parameter estimation for a model with both imperfect test and repair... in Proceedings of the IEEE VLSI Test Symposium, Berkeley, CA, 6-10 May 2007, IEEE, 2007, pp 271-276</dr:CobjIdentifier> |
|
14 |
<dr:CobjIdentifier>Y</dr:CobjIdentifier> |
|
15 |
<dr:CobjIdentifier>Y</dr:CobjIdentifier> |
|
16 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
17 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
18 |
<dr:CobjDescriptionSynthesis/> |
|
19 |
|
|
20 |
<dr:repositoryName>TARA</dr:repositoryName> |
|
21 |
<dr:repositoryLink>http://www.tara.tcd.ie/</dr:repositoryLink> |
|
22 |
<dr:repositoryCountry>IE</dr:repositoryCountry> |
|
23 |
<dr:repositoryInstitution/> |
|
24 |
<dc:creator>WILSON, SIMON PAUL</dc:creator> |
|
25 |
<dc:title>Parameter estimation for a model with both imperfect test and repair</dc:title> |
|
26 |
|
|
27 |
<dc:subject>Statistics</dc:subject> |
|
28 |
<dr:CobjCategory>0004</dr:CobjCategory> |
|
29 |
<dc:language>eng</dc:language> |
|
30 |
<dc:dateAccepted>2009-09-18</dc:dateAccepted> |
|
31 |
<dc:identifier>http://hdl.handle.net/2262/32968</dc:identifier> |
|
32 |
|
|
33 |
<dc:publisher>IEEE</dc:publisher> |
|
34 |
<dc:source/> |
|
35 |
<dc:contributor/> |
|
36 |
<dc:relation/> |
|
37 |
<dc:description>The involvement of BLI researchers is supported by a grant from the Industrial Development Agency of Ireland. The involvement of CTVR researchers is supported by Science Foundation Ireland grant 03/CE3/I405.</dc:description> |
|
38 |
<dc:description>We describe estimation of the parameters of a manufacturing test and repair model using data available from that test. The model allows imperfect testing and imperfect repair. The principal problem that we address is of parameter identification, given insufficient data, that we address by making conservative assumptions on the property being measured and the associated parameter values. Several cases of commonly occurring test types, in the manufacture of electronic products, are considered.</dc:description> |
|
39 |
<dc:description>PUBLISHED</dc:description> |
|
40 |
|
|
41 |
</metadata> |
|
42 |
</result> |
|
43 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/record5.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.42426404"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/34935</dri:objIdentifier> |
|
6 |
<dri:dateOfCollection>2009-12-30T00:22:10Z</dri:dateOfCollection> |
|
7 |
|
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier/> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
15 |
|
|
16 |
<dr:CobjDescriptionSynthesis/> |
|
17 |
<dr:repositoryName>DSpace at Utrecht University</dr:repositoryName> |
|
18 |
<dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>NL</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Hoofd, M.G.V. van het</dc:creator> |
|
22 |
<dc:title>Exercise and depressionafter stroke, a systematic review Shuttle Walk Test in patients who suffered a stroke, a feasibility study</dc:title> |
|
23 |
|
|
24 |
<dc:subject>exercise</dc:subject> |
|
25 |
<dc:subject>Fysiotherapiewetenschap</dc:subject> |
|
26 |
<dc:subject>feasibility study</dc:subject> |
|
27 |
<dc:subject>systematic review Shuttle Walk Test in patients who suffered a stroke, a feasibility study: stroke</dc:subject> |
|
28 |
<dc:subject>Exercise and depressionafter stroke, a systematic review: stroke</dc:subject> |
|
29 |
<dc:subject>Shuttle Walk Test</dc:subject> |
|
30 |
|
|
31 |
<dc:subject>Geneeskunde</dc:subject> |
|
32 |
<dc:subject>depression</dc:subject> |
|
33 |
<dc:subject>aerobic capacity</dc:subject> |
|
34 |
<dr:CobjCategory>0007</dr:CobjCategory> |
|
35 |
<dc:language>eng</dc:language> |
|
36 |
<dc:dateAccepted>2009-06-30</dc:dateAccepted> |
|
37 |
|
|
38 |
<dc:identifier>http://igitur-archive.library.uu.nl/student-theses/2009-0807-200622/UUindex.html</dc:identifier> |
|
39 |
<dc:publisher/> |
|
40 |
<dc:source/> |
|
41 |
<dc:contributor>Brussel, M. van</dc:contributor> |
|
42 |
<dc:contributor>Port, I.G.L. van de</dc:contributor> |
|
43 |
<dc:contributor>Takken, T.</dc:contributor> |
|
44 |
<dc:relation/> |
|
45 |
|
|
46 |
<dc:description>Exercise and depression after stroke Purpose-Aim of this review is to summarize the evidence from (randomized) controlled trials regarding the effects of exercise on depression or depressive symptoms in patients who had suffered a stroke. Methods-Studies that included patients who suffered a stroke and measured an outcome concerning depression were systematically reviewed. After determining the methodological quality by the Pedro-scale, a best evidence synthesis was applied. Results-Two out of seven studies showed significant differences between both groups, in favor of the intervention group. Best evidence synthesis showed insufficient evidence for positive effects of exercise on depression in patients who suffered a stroke. Conclusion-From the studies included in the present review it cannot be concluded that exercise interventions had a positive effect on depression in patients who suffered a stroke. Key Words: stroke ... depression ... exercise ... systematic review Shuttle Walk Test in patients who suffered a stroke Objective-To evaluate the feasibility of the Shuttle Walk Test (SWT) and the Shuttle Run Test for children with cerebral palsy at GMFCS level II (SRT-II) in patients who suffered a stroke. Methods-Fifteen patients who suffered a stroke completed both the SWT and SRT-II to evaluate aerobic capacity. Results-Significant differences were found in maximum heart rate and test duration in favor of the SRT-II. No significant difference was found in perceived exertion. Conclusion-The SRT-II is more feasible to assess aerobic capacity in patients who suffered a stroke compared to the SWT. Key Words: stroke ... aerobic capacity ... feasibility study ... Shuttle Walk Test Nederlandse samenvatting De invloed van bewegen op depressie bij mensen met een CVA Doel-Dit review heeft als doel om te onderzoeken wat de invloed van bewegen is op depressie bij mensen met een CVA. Methode-Een systematische zoektocht is gedaan naar (gerandomiseerde), gecontroleerde studies waarin pati..nten met een CVA ge..ncludeerd waren en depressie als uitkomstmaat gemeten werd. Nadat de methodologische kwaliteit werd bepaald met de PEDro-schaal, is een best evidence synthese opgesteld. Resultaten-Twee van zeven studies lieten een significant, positief effect zien van bewegen op depressie bij mensen met een CVA. De best evidence synthese toonde onvoldoende bewijs voor positieve effecten van bewegen bij mensen met een CVA. Conclusie-Uit dit review kan niet geconcludeerd worden dat bewegen een positief effect heeft op depressie bij mensen met een CVA. Trefwoorden: CVA ... depressie ... bewegen ... systematisch review Een shuttle wandeltest bij mensen met een CVA Doel-In deze studie wordt de toepasbaarheid onderzocht van de Shuttle Wandel Test (SWT) en de Shuttle Run Test voor kinderen met een cerebrale parese met GMFCS-niveau II (SRT-II) bij mensen met een cerebrovasculair accident (CVA). Methode-Vijftien pati..nten met een CVA voerden de SWT en de SRT-II uit om het maximale inspanningsvermogen te meten. Resultaten-Significante verschillen werden gevonden wat betreft maximale hartslag en testduur tussen beide testen ten gunste van SRT-II. Er werd geen significant verschil gevonden in ervaren vermoeidheid (Borg-schaal). Conclusie-De SRT-II is beter toepasbaar om het maximale inspanningsvermogen te meten van mensen met een CVA dan de SWT. Trefwoorden: CVA ... maximaal inspanningsvermogen ... haalbaarheidsstudie ... Shuttle Walk Test</dc:description> |
|
47 |
</metadata> |
|
48 |
</result> |
|
49 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/record6.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.42426404"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/36190</dri:objIdentifier> |
|
6 |
|
|
7 |
<dri:dateOfCollection>2009-12-30T00:23:45Z</dri:dateOfCollection> |
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier/> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
|
|
15 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
16 |
<dr:CobjDescriptionSynthesis/> |
|
17 |
<dr:repositoryName>DSpace at Utrecht University</dr:repositoryName> |
|
18 |
<dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>NL</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Junte, R.D.</dc:creator> |
|
22 |
|
|
23 |
<dc:title>Acaricide resistance in the blue cattle tick in South Africa; A comparison of three assays for determining tick resistance</dc:title> |
|
24 |
<dc:subject>Acaricide resistance, 3 South African Provinces, blue cattle tick, Riphicephalus (Boophilus) decoloratus, comparison, Adult Immersion Test, Shaw Larval Test, Larval Packet Test, cattle, resistant</dc:subject> |
|
25 |
<dc:subject>Diergeneeskunde</dc:subject> |
|
26 |
<dc:subject>Diergeneeskunde</dc:subject> |
|
27 |
<dr:CobjCategory>0007</dr:CobjCategory> |
|
28 |
<dc:language>eng</dc:language> |
|
29 |
|
|
30 |
<dc:dateAccepted>2007-08-21</dc:dateAccepted> |
|
31 |
<dc:identifier>http://igitur-archive.library.uu.nl/student-theses/2009-1015-200152/UUindex.html</dc:identifier> |
|
32 |
<dc:publisher/> |
|
33 |
<dc:source/> |
|
34 |
<dc:contributor>Dr. E. van Dalen, Prof. Dr. F. Jongejan</dc:contributor> |
|
35 |
<dc:relation/> |
|
36 |
<dc:description>The susceptibility of engorged females and larvae of Boophilus decoloratus ticks,- collected from cattle on breeding farms in Kwazula Natal, Eastern Cape and Limpopo provinces in South Africa-, for amitraz, cypermetrin and chlorfenvinphos. Was examined by means of the Adult Immersion test (AIT), the Shaw Larval Immersion Test (SLIT) and the Larval Packet Test (LPT). The results indicated resistance of Boophilus ticks to cypermetrin on all nine farms examined. On six farms ticks had developed resistance against amitraz demonstrated by all three tests procedures. On one farm ticks were still susceptible for amitraz in all tests, whereas in 2 remaining farms tests were not in agreement. Furthermore ticks were found susceptible for chlorfinvenphos on three farms in all three tests, whereas in 5 farms showed they appeared to be resistant. In general the results obtained with the different tests were in agreement.</dc:description> |
|
37 |
|
|
38 |
</metadata> |
|
39 |
</result> |
|
40 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/record7.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.42426404"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>9dffbf71-6914-40fe-b110-8e41a977ba90_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dro.dur.ac.uk.OAI2:1691</dri:objIdentifier> |
|
6 |
<dri:dateOfCollection>2009-12-29T22:13:17Z</dri:dateOfCollection> |
|
7 |
|
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier>Gott, R. and Roberts, R. (2004) 'A written test for procedural understanding : a way forward for assessment in the UK science curriculum ?', Research in science &amp;amp; technological education., 22 (1). pp. 5-21.</dr:CobjIdentifier> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
15 |
|
|
16 |
<dr:CobjDescriptionSynthesis/> |
|
17 |
<dr:repositoryName>Durham Research Online</dr:repositoryName> |
|
18 |
<dr:repositoryLink>http://dro.dur.ac.uk</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>UK</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Gott, R.</dc:creator> |
|
22 |
<dc:creator>Roberts, R.</dc:creator> |
|
23 |
|
|
24 |
<dc:title>A written test for procedural understanding : a way forward for assessment in the UK science curriculum ?</dc:title> |
|
25 |
<dc:subject/> |
|
26 |
<dr:CobjCategory>0001</dr:CobjCategory> |
|
27 |
<dc:language>eng</dc:language> |
|
28 |
<dc:dateAccepted>2004-05-01</dc:dateAccepted> |
|
29 |
<dc:identifier>http://dx.doi.org/10.1080/0263514042000187511</dc:identifier> |
|
30 |
<dc:publisher>Routledge</dc:publisher> |
|
31 |
|
|
32 |
<dc:source/> |
|
33 |
<dc:contributor/> |
|
34 |
<dc:relation>http://igitur-archive.library.uu.nl/lg/2008-0326-201100/preprint172.pdf</dc:relation> |
|
35 |
<dc:relation>http://dx.doi.org/10.1080/0263514042000187511</dc:relation> |
|
36 |
<dc:description>A recent UK House of Commons report on Science 14-19 identified problems with coursework and argued for a greater emphasis on teaching and assessment of scientific literacy. This paper describes a written test for procedural understanding, given to 15 year olds, that addresses both of these issues. Comparisons are made between the scores on a written test of procedural understanding with both assessments made of subject knowledge and pupil accounts of investigations. The potential advantages of assessing procedural understanding by written tests are discussed.</dc:description> |
|
37 |
</metadata> |
|
38 |
</result> |
|
39 |
|
|
40 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/SuperTester.java | ||
---|---|---|
1 |
import java.util.Vector; |
|
2 |
|
|
3 |
import org.apache.log4j.BasicConfigurator; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler; |
|
7 |
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor; |
|
8 |
import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter; |
|
9 |
|
|
10 |
|
|
11 |
public class SuperTester { |
|
12 |
|
|
13 |
@Test |
|
14 |
public void TestCrawlingAndExtraction() throws Exception { |
|
15 |
BasicConfigurator.configure(); |
|
16 |
|
|
17 |
Crawler crawler = new Crawler(); |
|
18 |
ResourceExtractor extractor = new ResourceExtractor(); |
|
19 |
|
|
20 |
String idUrl = UrlFilter.resolveRedirections("http://www.di.uoa.gr"); |
|
21 |
System.out.println("Now processing " + idUrl); |
|
22 |
Vector<String> urls = crawler.getLinks(idUrl); |
|
23 |
System.out.println("Retrieved links are: "+ urls); |
|
24 |
System.out.println("Resources seem to be available in: " + extractor.extractResource(urls)); |
|
25 |
System.out.println(); |
|
26 |
} |
|
27 |
|
|
28 |
} |
modules/uoa-resource-discovery/trunk/test/junit/record8.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.42426404"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>9dffbf71-6914-40fe-b110-8e41a977ba90_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dro.dur.ac.uk.OAI2:1856</dri:objIdentifier> |
|
6 |
<dri:dateOfCollection>2009-12-29T22:13:24Z</dri:dateOfCollection> |
|
7 |
</header> |
|
8 |
<metadata> |
|
9 |
|
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier>Remedios, R. and Ritchie, K. and Lieberman, D. A. (2005) 'I used to like it but now I don't : the effect of the transfer test in Northern Ireland on pupils' intrinsic motivation.', British journal of educational psychology., 75 (3). pp. 435-452.</dr:CobjIdentifier> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
15 |
<dr:CobjDescriptionSynthesis/> |
|
16 |
<dr:repositoryName>Durham Research Online</dr:repositoryName> |
|
17 |
|
|
18 |
<dr:repositoryLink>http://dro.dur.ac.uk</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>UK</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Remedios, R.</dc:creator> |
|
22 |
<dc:creator>Lieberman, D. A.</dc:creator> |
|
23 |
<dc:creator>Ritchie, K.</dc:creator> |
|
24 |
|
|
25 |
<dc:title>I used to like it but now I don't : the effect of the transfer test in Northern Ireland on pupils' intrinsic motivation.</dc:title> |
|
26 |
<dc:subject/> |
|
27 |
<dr:CobjCategory>0001</dr:CobjCategory> |
|
28 |
<dc:language>eng</dc:language> |
|
29 |
<dc:dateAccepted>2005-09-01</dc:dateAccepted> |
|
30 |
<dc:identifier/> |
|
31 |
<dc:publisher>British Psychological Society</dc:publisher> |
|
32 |
|
|
33 |
<dc:source/> |
|
34 |
<dc:contributor/> |
|
35 |
<dc:relation>http://dro.dur.ac.uk/1856/</dc:relation> |
|
36 |
<dc:relation>http://dx.doi.org/10.1348/000709904X24771</dc:relation> |
|
37 |
<dc:description>Background. Research has suggested that the pressure of exams could undermine pupils' interest in their subjects, but almost all of this research has been conducted in laboratory settings. The Transfer Test in Northern Ireland provides an unusual opportunity to assess the effects of exam pressure in real life because some 10- and 11-year-olds sit a Transfer Test to be admitted to grammar school while others are not tested until they are 14. Aim. To assess the effect of exams on pupils' interest in their subjects both during the period before the exam and after the results are known. Sample. The sample comprised 66 pupils preparing to sit the Transfer Test and 55 not preparing for the test. Method. Pupils' interest in their school subjects was assessed by questionnaires administered 2 weeks before the Transfer Test and then again 2 weeks after the results were announced. Results. Surprisingly, prior to sitting the test, there was no significant difference in motivation between the test and no-test pupils. However, after sitting the test, the motivation of the test pupils decreased significantly relative to their no-test counterparts, despite the fact that most achieved the grades they needed for admission to grammar school. Conclusions. Exams provide a valuable tool for assessing academic progress, but under some circumstances they can reduce pupils' interest in the subjects they are studying. </dc:description> |
|
38 |
</metadata> |
|
39 |
</result> |
|
40 |
|
|
41 |
</record> |
modules/uoa-resource-discovery/trunk/test/junit/MyResultDao.java | ||
---|---|---|
1 |
import java.util.ArrayList; |
|
2 |
import java.util.List; |
|
3 |
|
|
4 |
import eu.dnetlib.data.utility.featureextraction.dao.IResultDao; |
|
5 |
|
|
6 |
|
|
7 |
public class MyResultDao implements IResultDao { |
|
8 |
|
|
9 |
ArrayList<String> results; |
|
10 |
|
|
11 |
public MyResultDao() { |
|
12 |
this.results = new ArrayList<String>(); |
|
13 |
} |
|
14 |
|
|
15 |
@Override |
|
16 |
public void addResults(List<String> results) { |
|
17 |
for(String result : results) |
|
18 |
this.results.add(result); |
|
19 |
} |
|
20 |
|
|
21 |
@Override |
|
22 |
public void close() { |
|
23 |
// TODO Auto-generated method stub |
|
24 |
|
|
25 |
} |
|
26 |
|
|
27 |
@Override |
|
28 |
public int getNumberOfElements() { |
|
29 |
return results.size(); |
|
30 |
} |
|
31 |
|
|
32 |
@Override |
|
33 |
public List<String> getResults(int from, int to) { |
|
34 |
return results.subList(from-1, to); |
|
35 |
} |
|
36 |
|
|
37 |
} |
modules/uoa-resource-discovery/trunk/test/junit/gr/uoa/di/resourcediscovery/test/Sample.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery.test; |
|
2 |
|
|
3 |
import gr.uoa.di.resourcediscovery.MethodProvider; |
|
4 |
import gr.uoa.di.resourcediscovery.MethodProviderFileStorageImpl; |
|
5 |
import gr.uoa.di.resourcediscovery.methods.ResourceDiscoveryMethod; |
|
6 |
import gr.uoa.di.resourcediscovery.methods.XPathAndCrawl; |
|
7 |
|
|
8 |
import java.net.URL; |
|
9 |
import java.util.ArrayList; |
|
10 |
import java.util.List; |
|
11 |
|
|
12 |
import org.apache.log4j.BasicConfigurator; |
|
13 |
|
|
14 |
public class Sample { |
|
15 |
|
|
16 |
public static void main(String[] args) throws Exception { |
|
17 |
BasicConfigurator.configure(); |
|
18 |
|
|
19 |
URL conUrl = new URL("http://rudie.di.uoa.gr:8080/files/"); |
|
20 |
|
|
21 |
MethodProvider provider = new MethodProviderFileStorageImpl("method-map.xml"); |
|
22 |
ResourceDiscoveryMethod method = provider.getMethod(conUrl); |
|
23 |
|
|
24 |
if(method == null) { |
|
25 |
List<String> mimeTypes = new ArrayList<String>(); |
|
26 |
mimeTypes.add("application/pdf"); |
|
27 |
|
|
28 |
XPathAndCrawl xpath = new XPathAndCrawl(mimeTypes, "http://rudie.di.uoa.gr:8080/robots.txt"); |
|
29 |
|
|
30 |
method = xpath; |
|
31 |
} |
|
32 |
|
|
33 |
System.out.println("resources found in: " + method.getResources(conUrl, provider)); |
|
34 |
} |
|
35 |
|
|
36 |
} |
modules/uoa-resource-discovery/trunk/test/junit/MySourceDataProvider.java | ||
---|---|---|
1 |
import java.util.ArrayList; |
|
2 |
import java.util.List; |
|
3 |
|
|
4 |
import eu.dnetlib.data.utility.featureextraction.FeatureExtractionException; |
|
5 |
import eu.dnetlib.data.utility.featureextraction.dataprovider.SourceDataProvider; |
|
6 |
|
|
7 |
|
|
8 |
public class MySourceDataProvider implements SourceDataProvider { |
|
9 |
|
|
10 |
ArrayList<String> DMFRecords; |
|
11 |
|
|
12 |
public MySourceDataProvider() { |
|
13 |
this.DMFRecords = new ArrayList<String>(); |
|
14 |
} |
|
15 |
|
|
16 |
public void addDMFRecord(String record) { |
|
17 |
DMFRecords.add(record); |
|
18 |
} |
|
19 |
|
|
20 |
@Override |
|
21 |
public List<String> getRecords(int from, int to) |
|
22 |
throws FeatureExtractionException { |
|
23 |
try { |
|
24 |
if(to >= this.DMFRecords.size()) |
|
25 |
to = this.DMFRecords.size(); |
|
26 |
return DMFRecords.subList(from-1, to); |
|
27 |
} |
|
28 |
catch(Exception e) { |
|
29 |
throw new FeatureExtractionException(e); |
|
30 |
} |
|
31 |
} |
|
32 |
|
|
33 |
@Override |
|
34 |
public int getSize() throws FeatureExtractionException { |
|
35 |
return this.DMFRecords.size(); |
|
36 |
} |
|
37 |
|
|
38 |
@Override |
|
39 |
public String getStatus() throws FeatureExtractionException { |
|
40 |
// TODO Auto-generated method stub |
|
41 |
return null; |
|
42 |
} |
|
43 |
|
|
44 |
} |
modules/uoa-resource-discovery/trunk/test/junit/TestResourceDiscoverer.java | ||
---|---|---|
1 |
import java.util.Calendar; |
|
2 |
|
|
3 |
import org.junit.BeforeClass; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.data.utility.resource_discovery.crawler.Crawler; |
|
7 |
import eu.dnetlib.data.utility.resource_discovery.crawler.ResourceExtractor; |
|
8 |
import eu.dnetlib.data.utility.resource_discovery.plugin.crawler.ResourceDiscoverer; |
|
9 |
|
|
10 |
|
|
11 |
public class TestResourceDiscoverer { |
|
12 |
|
|
13 |
@BeforeClass |
|
14 |
public static void Config() { |
|
15 |
//BasicConfigurator.configure(); |
|
16 |
} |
|
17 |
|
|
18 |
@Test |
|
19 |
public void test() throws Exception { |
|
20 |
String url = "http://elib.uni-stuttgart.de/opus/volltexte/1999/1/"; |
|
21 |
Calendar cal = Calendar.getInstance(); |
|
22 |
System.out.println(cal.getTime()); |
|
23 |
ResourceDiscoverer discoverer = new ResourceDiscoverer(); |
|
24 |
System.out.println(discoverer.getResources(url)); |
|
25 |
cal = Calendar.getInstance(); |
|
26 |
System.out.println(cal.getTime()); |
|
27 |
Crawler crawler = new Crawler(); |
|
28 |
ResourceExtractor extractor = new ResourceExtractor(); |
|
29 |
System.out.println(extractor.extractResource(crawler.getLinks(url))); |
|
30 |
cal = Calendar.getInstance(); |
|
31 |
System.out.println(cal.getTime()); |
|
32 |
} |
|
33 |
} |
modules/uoa-resource-discovery/trunk/test/junit/TestPlugin.java | ||
---|---|---|
1 |
import java.io.BufferedReader; |
|
2 |
import java.io.InputStreamReader; |
|
3 |
|
|
4 |
import org.apache.log4j.BasicConfigurator; |
|
5 |
import org.junit.*; |
|
6 |
|
|
7 |
import eu.dnetlib.data.utility.resource_discovery.plugin.ResourceDiscoveryPlugin; |
|
8 |
|
|
9 |
|
|
10 |
public class TestPlugin { |
|
11 |
|
|
12 |
MySourceDataProvider provider; |
|
13 |
static private int N = 7; |
|
14 |
|
|
15 |
@BeforeClass |
|
16 |
public static void Config() { |
|
17 |
BasicConfigurator.configure(); |
|
18 |
} |
|
19 |
|
|
20 |
@Before |
|
21 |
public void InitializeProvider() throws Exception { |
|
22 |
provider = new MySourceDataProvider(); |
|
23 |
for(int i=7;i<=N;i++) { |
|
24 |
BufferedReader br = new BufferedReader(new InputStreamReader(TestPlugin.class.getResourceAsStream("record"+i+".xml"))); |
|
25 |
|
|
26 |
String line; |
|
27 |
String record = ""; |
|
28 |
while((line = br.readLine()) != null) |
|
29 |
record += line; |
|
30 |
provider.addDMFRecord(record); |
|
31 |
} |
|
32 |
} |
|
33 |
|
|
34 |
@Test |
|
35 |
public void TestResourcePlugin() throws Exception { |
|
36 |
ResourceDiscoveryPlugin plugin = new ResourceDiscoveryPlugin(); |
|
37 |
plugin.setDao(new MyResultDao()); |
|
38 |
plugin.setSourceDataProvider(provider); |
|
39 |
plugin.init(); |
|
40 |
|
|
41 |
//System.out.println(provider.getRecords(1, 2)); |
|
42 |
//System.out.println(provider.getRecords(1, 2).size()); |
|
43 |
plugin.execute(); |
|
44 |
System.out.println("\n\n" + plugin.getDao().getResults(1, 1)); |
|
45 |
} |
|
46 |
|
|
47 |
} |
modules/uoa-resource-discovery/trunk/test/junit/record1.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<record rank="0.99999994"> |
|
3 |
<result xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr"> |
|
4 |
<header> |
|
5 |
<dri:objIdentifier>7a22e67a-364b-4a2d-bcc1-cd6cc8a4e9d0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:dspace.library.uu.nl:1874/34916</dri:objIdentifier> |
|
6 |
<dri:dateOfCollection>2009-12-30T00:22:07Z</dri:dateOfCollection> |
|
7 |
</header> |
|
8 |
<metadata> |
|
9 |
|
|
10 |
<dr:CobjContentSynthesis/> |
|
11 |
<dr:CobjTypology>Textual</dr:CobjTypology> |
|
12 |
<dr:CobjIdentifier/> |
|
13 |
<dr:CobjModel>OAI</dr:CobjModel> |
|
14 |
<dr:CobjMDFormats>oai_dc</dr:CobjMDFormats> |
|
15 |
<dr:CobjDescriptionSynthesis/> |
|
16 |
<dr:repositoryName>DSpace at Utrecht University</dr:repositoryName> |
|
17 |
|
|
18 |
<dr:repositoryLink>http://www.igitur.nl/</dr:repositoryLink> |
|
19 |
<dr:repositoryCountry>NL</dr:repositoryCountry> |
|
20 |
<dr:repositoryInstitution/> |
|
21 |
<dc:creator>Westerlaken, J.</dc:creator> |
|
22 |
<dc:title>Test</dc:title> |
|
23 |
<dc:subject>test</dc:subject> |
|
24 |
|
|
25 |
<dc:subject>Geneeskunde</dc:subject> |
|
26 |
<dc:subject>Verplegingswetenschap</dc:subject> |
|
27 |
<dr:CobjCategory>0007</dr:CobjCategory> |
|
28 |
<dc:language>dut/nla</dc:language> |
|
29 |
<dc:dateAccepted>2009-07-31</dc:dateAccepted> |
|
30 |
<dc:identifier>http://igitur-archive.library.uu.nl/dissertations/2005-1018-200018/index.htm</dc:identifier> |
|
31 |
|
|
32 |
<dc:publisher/> |
|
33 |
<dc:source/> |
|
34 |
<dc:contributor>Teat, test</dc:contributor> |
|
35 |
<dc:contributor>Test, Test</dc:contributor> |
|
36 |
<dc:relation/> |
|
37 |
<dc:description>Test</dc:description> |
|
38 |
</metadata> |
|
39 |
|
|
40 |
</result> |
|
41 |
</record> |
modules/uoa-resource-discovery/trunk/.project | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>uoa-resource-discovery</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
</buildSpec> |
|
14 |
<natures> |
|
15 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
16 |
</natures> |
|
17 |
</projectDescription> |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/UnknownMethodException.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery; |
|
2 |
|
|
3 |
public class UnknownMethodException extends Exception { |
|
4 |
private static final long serialVersionUID = 760327436365242998L; |
|
5 |
|
|
6 |
} |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/MethodProviderFileStorageImpl.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery; |
|
2 |
|
|
3 |
import gr.uoa.di.resourcediscovery.methods.ResourceDiscoveryMethod; |
|
4 |
|
|
5 |
import java.io.File; |
|
6 |
import java.io.FileNotFoundException; |
|
7 |
import java.io.FileReader; |
|
8 |
import java.io.FileWriter; |
|
9 |
import java.io.IOException; |
|
10 |
import java.net.URL; |
|
11 |
import java.util.HashMap; |
|
12 |
|
|
13 |
import com.thoughtworks.xstream.XStream; |
|
14 |
|
|
15 |
public class MethodProviderFileStorageImpl implements MethodProvider { |
|
16 |
|
|
17 |
private String pathToFile = null; |
|
18 |
|
|
19 |
HashMap<URL, ResourceDiscoveryMethod> map = new HashMap<URL, ResourceDiscoveryMethod>(); |
|
20 |
|
|
21 |
public MethodProviderFileStorageImpl() { |
|
22 |
|
|
23 |
} |
|
24 |
|
|
25 |
@SuppressWarnings("unchecked") |
|
26 |
public MethodProviderFileStorageImpl(String pathToFile) throws FileNotFoundException { |
|
27 |
XStream xstream = new XStream(); |
|
28 |
if(!(new File(pathToFile).exists())) |
|
29 |
map = new HashMap<URL, ResourceDiscoveryMethod>(); |
|
30 |
else |
|
31 |
map = (HashMap<URL, ResourceDiscoveryMethod>) xstream.fromXML(new FileReader(new File(pathToFile))); |
|
32 |
this.pathToFile = pathToFile; |
|
33 |
} |
|
34 |
|
|
35 |
@Override |
|
36 |
public ResourceDiscoveryMethod getMethod(URL baseUrl) throws MalformedConfigurationException, UnknownMethodException, IOException { |
|
37 |
baseUrl = new URL(Toolkit.getRedirectedUrl(baseUrl.toString(), 500)); |
|
38 |
ResourceDiscoveryMethod ret = map.get(new URL(baseUrl.getProtocol()+"://"+baseUrl.getHost())); |
|
39 |
return ret; |
|
40 |
} |
|
41 |
|
|
42 |
@Override |
|
43 |
public void setMethod(URL baseUrl, ResourceDiscoveryMethod method) { |
|
44 |
map.put(baseUrl, method); |
|
45 |
try { |
|
46 |
store(); |
|
47 |
} catch (IOException e) { |
|
48 |
e.printStackTrace(); |
|
49 |
} |
|
50 |
} |
|
51 |
|
|
52 |
public String getPathToFile() { |
|
53 |
return pathToFile; |
|
54 |
} |
|
55 |
|
|
56 |
public void setPathToFile(String pathToFile) { |
|
57 |
this.pathToFile = pathToFile; |
|
58 |
} |
|
59 |
|
|
60 |
public void store() throws IOException { |
|
61 |
XStream xstream = new XStream(); |
|
62 |
xstream.toXML(map, new FileWriter(new File(pathToFile))); |
|
63 |
} |
|
64 |
|
|
65 |
} |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/ResourceDiscoveryMethod.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery.methods; |
|
2 |
|
|
3 |
import gr.uoa.di.resourcediscovery.MethodProvider; |
|
4 |
|
|
5 |
import java.io.IOException; |
|
6 |
import java.net.URL; |
|
7 |
import java.util.List; |
|
8 |
|
|
9 |
import org.xml.sax.SAXException; |
|
10 |
|
|
11 |
public interface ResourceDiscoveryMethod { |
|
12 |
|
|
13 |
public List<String> getResources(URL upageUrl, MethodProvider provider) throws SAXException, IOException; |
|
14 |
} |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/XPathAndCrawl.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery.methods; |
|
2 |
|
|
3 |
import gr.uoa.di.resourcediscovery.MalformedConfigurationException; |
|
4 |
import gr.uoa.di.resourcediscovery.MethodProvider; |
|
5 |
import gr.uoa.di.resourcediscovery.Toolkit; |
|
6 |
|
|
7 |
import java.io.BufferedReader; |
|
8 |
import java.io.FileNotFoundException; |
|
9 |
import java.io.IOException; |
|
10 |
import java.io.InputStreamReader; |
|
11 |
import java.net.MalformedURLException; |
|
12 |
import java.net.URL; |
|
13 |
import java.util.ArrayList; |
|
14 |
import java.util.List; |
|
15 |
|
|
16 |
import org.apache.log4j.Logger; |
|
17 |
import org.archive.modules.net.RobotsDirectives; |
|
18 |
import org.archive.modules.net.Robotstxt; |
|
19 |
import org.cyberneko.html.parsers.DOMParser; |
|
20 |
import org.w3c.dom.Document; |
|
21 |
import org.w3c.dom.Node; |
|
22 |
import org.w3c.dom.traversal.DocumentTraversal; |
|
23 |
import org.w3c.dom.traversal.NodeFilter; |
|
24 |
import org.w3c.dom.traversal.NodeIterator; |
|
25 |
import org.xml.sax.SAXException; |
|
26 |
|
|
27 |
public class XPathAndCrawl implements ResourceDiscoveryMethod { |
|
28 |
|
|
29 |
transient Logger logger = Logger.getLogger(XPathAndCrawl.class); |
|
30 |
|
|
31 |
private boolean resolveFrames = true; |
|
32 |
private boolean skipFirstPage = false; |
|
33 |
private long sleepMillis = 100; |
|
34 |
private boolean ignoreRobotsTxt = false; |
|
35 |
private String agentName = "OpenAIRE_Harvester"; |
|
36 |
private List<String> mimeTypes = new ArrayList<String>(); |
|
37 |
private boolean fallback = true; |
|
38 |
private String robotstxtUrl = null; |
|
39 |
|
|
40 |
transient private Robotstxt robot = null; |
|
41 |
transient private RobotsDirectives directives = null; |
|
42 |
|
|
43 |
private List<String> xpaths = new ArrayList<String>(); |
|
44 |
|
|
45 |
public XPathAndCrawl() { |
|
46 |
this.ignoreRobotsTxt = true; |
|
47 |
} |
|
48 |
|
|
49 |
// you need one per repository! |
|
50 |
public XPathAndCrawl(List<String> mimeTypes, String robotstxtUrl) throws FileNotFoundException, IOException { |
|
51 |
this.mimeTypes.addAll(mimeTypes); |
|
52 |
|
|
53 |
if (robotstxtUrl != null) { |
|
54 |
URL url = new URL(robotstxtUrl); |
|
55 |
try { |
|
56 |
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); |
|
57 |
this.robot = new Robotstxt(in); |
|
58 |
this.directives = this.robot.getDirectivesFor(agentName); |
|
59 |
} catch (FileNotFoundException ex) { |
|
60 |
logger.debug("Robots.txt was not found at " + robotstxtUrl); |
|
61 |
ignoreRobotsTxt = true; |
|
62 |
} |
|
63 |
} else { |
|
64 |
ignoreRobotsTxt = true; |
|
65 |
} |
|
66 |
} |
|
67 |
|
|
68 |
public void setRobotstxt(String robotstxtUrl) throws FileNotFoundException, IOException { |
|
69 |
this.robotstxtUrl = robotstxtUrl; |
|
70 |
if (robotstxtUrl != null) { |
|
71 |
URL url = new URL(robotstxtUrl); |
|
72 |
try { |
|
73 |
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); |
|
74 |
this.robot = new Robotstxt(in); |
|
75 |
this.directives = this.robot.getDirectivesFor(agentName); |
|
76 |
} catch (FileNotFoundException ex) { |
|
77 |
logger.debug("Robots.txt was not found at " + robotstxtUrl); |
|
78 |
ignoreRobotsTxt = true; |
|
79 |
} |
|
80 |
} else { |
|
81 |
ignoreRobotsTxt = true; |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
public String getRobotstxtUrl() { |
|
86 |
return robotstxtUrl; |
|
87 |
} |
|
88 |
|
|
89 |
@Override |
|
90 |
public List<String> getResources(URL upageUrl, MethodProvider provider) throws SAXException, IOException { |
|
91 |
|
|
92 |
String pageUrl = upageUrl.toString(); |
|
93 |
|
|
94 |
logger.debug("Known xpaths: "+this.xpaths); |
|
95 |
|
|
96 |
pageUrl = Toolkit.getRedirectedUrl(pageUrl, this.sleepMillis); |
|
97 |
|
|
98 |
logger.debug("Resolved possible redirections. Url: "+pageUrl); |
|
99 |
|
|
100 |
List<String> ret = new ArrayList<String>(); |
|
101 |
List<String> urls = new ArrayList<String>(); |
|
102 |
urls.add(pageUrl); |
|
103 |
|
|
104 |
// check if url is a redirection |
|
105 |
|
|
106 |
|
|
107 |
if(this.mimeTypes.contains(Toolkit.getMimeType(pageUrl, this.sleepMillis))) { |
|
108 |
ret.add(Toolkit.makeAbsolute(pageUrl, new URL(pageUrl))); |
|
109 |
return ret; |
|
110 |
} |
|
111 |
|
|
112 |
if (this.resolveFrames) { |
|
113 |
DOMParser parser = new DOMParser(); |
|
114 |
parser.parse(pageUrl); |
|
115 |
Document doc = parser.getDocument(); |
|
116 |
urls.addAll(resolveFrames(doc, new URL(pageUrl))); |
|
117 |
logger.debug("urls after resolving frames: " + urls); |
|
118 |
} |
|
119 |
|
|
120 |
if (this.skipFirstPage) { |
|
121 |
List<String> addme = new ArrayList<String>(); |
|
122 |
for (String url : urls) { |
|
123 |
DOMParser parser = new DOMParser(); |
|
124 |
parser.parse(url); |
|
125 |
Document doc = parser.getDocument(); |
|
126 |
addme.addAll(oneDepthDown(doc, new URL(url))); |
|
127 |
} |
|
128 |
|
|
129 |
urls.remove(pageUrl); |
|
130 |
|
|
131 |
if (this.resolveFrames) { |
|
132 |
for (String url : urls) { |
|
133 |
DOMParser parser = new DOMParser(); |
|
134 |
parser.parse(url); |
|
135 |
Document doc = parser.getDocument(); |
|
136 |
addme.addAll(resolveFrames(doc, new URL(url))); |
|
137 |
} |
|
138 |
} |
|
139 |
|
|
140 |
urls.addAll(addme); |
|
141 |
logger.debug("urls after skipping 1st page and resolving frames: " + urls); |
|
142 |
} |
|
143 |
|
|
144 |
for (String url : urls) { |
|
145 |
logger.debug("looking for resource in: " + url); |
|
146 |
try { |
|
147 |
url = Toolkit.makeAbsolute(url, new URL(pageUrl)); |
|
148 |
} catch (Exception e) { |
|
149 |
e.printStackTrace(); |
|
150 |
continue; |
|
151 |
} |
|
152 |
URL startingUrl = new URL(url); |
|
153 |
|
|
154 |
if (!this.ignoreRobotsTxt) |
|
155 |
if (!this.directives.allows(Toolkit.makeRelative(startingUrl))) { |
|
156 |
logger.debug("Skipping " + startingUrl + ". Disallowed by robots.txt directives."); |
|
157 |
continue; |
|
158 |
} |
|
159 |
|
|
160 |
if (this.xpaths.size() == 0) { |
|
161 |
logger.debug("No xpath information, crawling"); |
|
162 |
// this for the first time |
|
163 |
DOMParser parser = new DOMParser(); |
|
164 |
parser.parse(startingUrl.toString()); |
|
165 |
Document doc = parser.getDocument(); |
|
166 |
|
|
167 |
List<Node> resourceNodes = findNodesWithResource(doc, startingUrl); |
|
168 |
|
|
169 |
for (Node resourceNode : resourceNodes) { |
|
170 |
String xp = getXpathToRoot(resourceNode); |
|
171 |
xpaths.add(xp); |
|
172 |
logger.debug(xp); |
|
173 |
} |
|
174 |
|
|
175 |
try { |
|
176 |
URL methodUrl = new URL(pageUrl); |
|
177 |
provider.setMethod(new URL(methodUrl.getProtocol()+"://"+methodUrl.getHost()), this); |
|
178 |
} catch(MalformedConfigurationException e) { |
|
179 |
logger.error("Error updating xpath information", e); |
|
180 |
} |
|
181 |
|
|
182 |
for (String xp : xpaths) { |
|
183 |
String resourceUrl = getResourceUrl(xp, doc, startingUrl); |
|
184 |
if (resourceUrl != null) { |
|
185 |
logger.debug(resourceUrl); |
|
186 |
ret.add(resourceUrl); |
|
187 |
} |
|
188 |
} |
|
189 |
} else { |
|
190 |
// this is for the rest of the pages of the repo |
|
191 |
DOMParser parser = new DOMParser(); |
|
192 |
parser.parse(startingUrl.toString()); |
|
193 |
Document doc = parser.getDocument(); |
|
194 |
|
|
195 |
for (String xp : xpaths) { |
|
196 |
String resourceUrl = getResourceUrl(xp, doc, startingUrl); |
|
197 |
if (resourceUrl != null) { |
|
198 |
logger.debug(resourceUrl); |
|
199 |
ret.add(resourceUrl); |
|
200 |
} |
|
201 |
} |
|
202 |
} |
|
203 |
} |
|
204 |
|
|
205 |
if (ret.size() == 0 && this.fallback) { |
|
206 |
// if no xpath contained the resource, try to find it and add |
|
207 |
// all the xpaths |
|
208 |
for (String url : urls) { |
|
209 |
logger.debug("looking for resource in (not found in xpath): " + url); |
|
210 |
|
|
211 |
try { |
|
212 |
url = Toolkit.makeAbsolute(url, new URL(pageUrl)); |
|
213 |
} catch (Exception e) { |
|
214 |
e.printStackTrace(); |
|
215 |
continue; |
|
216 |
} |
|
217 |
URL startingUrl = new URL(url); |
|
218 |
|
|
219 |
if (!this.ignoreRobotsTxt) |
|
220 |
if (!this.directives.allows(Toolkit.makeRelative(startingUrl))) { |
|
221 |
logger.debug("Skipping " + startingUrl + ". Disallowed by robots.txt directives."); |
|
222 |
continue; |
|
223 |
} |
|
224 |
|
|
225 |
DOMParser parser = new DOMParser(); |
|
226 |
parser.parse(startingUrl.toString()); |
|
227 |
Document doc = parser.getDocument(); |
|
228 |
List<Node> resourceNodes = findNodesWithResource(doc, startingUrl); |
|
229 |
for (Node resourceNode : resourceNodes) { |
|
230 |
String xp = getXpathToRoot(resourceNode); |
|
231 |
xpaths.add(xp); |
|
232 |
logger.debug(xp); |
|
233 |
} |
|
234 |
|
|
235 |
try { |
|
236 |
URL methodUrl = new URL(pageUrl); |
|
237 |
provider.setMethod(new URL(methodUrl.getProtocol()+"://"+methodUrl.getHost()), this); |
|
238 |
} catch(MalformedConfigurationException e) { |
|
239 |
logger.error("Error updating xpath information", e); |
|
240 |
} |
|
241 |
|
|
242 |
for (String xp : xpaths) { |
|
243 |
String resourceUrl = getResourceUrl(xp, doc, startingUrl); |
|
244 |
if (resourceUrl != null) { |
|
245 |
logger.debug(resourceUrl); |
|
246 |
ret.add(resourceUrl); |
|
247 |
} |
|
248 |
} |
|
249 |
} |
|
250 |
} |
|
251 |
|
|
252 |
return ret; |
|
253 |
} |
|
254 |
|
|
255 |
private List<String> resolveFrames(Document doc, URL connectionUrl) { |
|
256 |
List<String> ret = new ArrayList<String>(); |
|
257 |
|
|
258 |
DocumentTraversal traversal = (DocumentTraversal) doc; |
|
259 |
|
|
260 |
NodeIterator iterator = null; |
|
261 |
try { |
|
262 |
iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true); |
|
263 |
} catch (Exception e) { |
|
264 |
e.printStackTrace(); |
|
265 |
return ret; |
|
266 |
} |
|
267 |
|
|
268 |
for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) { |
|
269 |
if (n.getNodeName().equals("FRAME") || n.getNodeName().equals("IFRAME")) { |
|
270 |
String url = n.getAttributes().getNamedItem("src").getNodeValue(); |
|
271 |
try { |
|
272 |
url = Toolkit.makeAbsolute(url, connectionUrl); |
|
273 |
ret.add(url); |
|
274 |
} catch (MalformedURLException ex) { |
|
275 |
continue; |
|
276 |
} |
|
277 |
} |
|
278 |
} |
|
279 |
return ret; |
|
280 |
} |
|
281 |
|
|
282 |
private List<String> oneDepthDown(Document doc, URL connectionUrl) throws IOException { |
|
283 |
List<String> ret = new ArrayList<String>(); |
|
284 |
|
|
285 |
DocumentTraversal traversal = (DocumentTraversal) doc; |
|
286 |
|
|
287 |
NodeIterator iterator = null; |
|
288 |
try { |
|
289 |
iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true); |
|
290 |
} catch (Exception e) { |
|
291 |
e.printStackTrace(); |
|
292 |
return ret; |
|
293 |
} |
|
294 |
|
|
295 |
for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) { |
|
296 |
if (n.getNodeName().equals("A")) { |
|
297 |
String url = n.getAttributes().getNamedItem("href").getNodeValue(); |
|
298 |
try { |
|
299 |
url = Toolkit.makeAbsolute(url, connectionUrl); |
|
300 |
if (Toolkit.getMimeType(url, this.sleepMillis).trim().contains("text/html")) |
|
301 |
ret.add(url); |
|
302 |
} catch (MalformedURLException ex) { |
|
303 |
continue; |
|
304 |
} |
|
305 |
} |
|
306 |
} |
|
307 |
return ret; |
|
308 |
} |
|
309 |
|
|
310 |
private String getXpathToRoot(Node node) { |
|
311 |
String xpath = ""; |
|
312 |
do { |
|
313 |
if (node.getNodeName().equals("HTML")) { |
|
314 |
int before = 1; |
|
315 |
while ((node = node.getPreviousSibling()) != null) |
|
316 |
before++; |
|
317 |
return "/HTML["+before+"]" + xpath; |
|
318 |
} |
|
319 |
int before = 0; |
|
320 |
Node current = node; |
|
321 |
while ((current = current.getPreviousSibling()) != null) |
|
322 |
if (current.getNodeName().equals(node.getNodeName())) |
|
323 |
before++; |
|
324 |
xpath = "/" + node.getNodeName() + "[" + (before + 1) + "]" + xpath; |
|
325 |
} while ((node = node.getParentNode()) != null); |
|
326 |
return xpath; |
|
327 |
} |
|
328 |
|
|
329 |
private List<Node> findNodesWithResource(Document doc, URL connectionUrl) throws IOException { |
|
330 |
List<Node> ret = new ArrayList<Node>(); |
|
331 |
|
|
332 |
DocumentTraversal traversal = (DocumentTraversal) doc; |
|
333 |
|
|
334 |
NodeIterator iterator = null; |
|
335 |
try { |
|
336 |
iterator = traversal.createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, null, true); |
|
337 |
} catch (Exception e) { |
|
338 |
e.printStackTrace(); |
|
339 |
return ret; |
|
340 |
} |
|
341 |
|
|
342 |
for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) { |
|
343 |
if (n.getNodeName().equals("A")) { |
|
344 |
String url = null; |
|
345 |
try { |
|
346 |
url = n.getAttributes().getNamedItem("href").getNodeValue(); |
|
347 |
} catch(NullPointerException e) { |
|
348 |
// anchor without href |
|
349 |
continue; |
|
350 |
} |
|
351 |
if (url == null) |
|
352 |
continue; |
|
353 |
try { |
|
354 |
url = Toolkit.makeAbsolute(url, connectionUrl); |
|
355 |
if (this.mimeTypes.contains(Toolkit.getMimeType(url, this.sleepMillis).trim())) |
|
356 |
ret.add(n); |
|
357 |
} catch (MalformedURLException ex) { |
|
358 |
continue; |
|
359 |
} |
|
360 |
} |
|
361 |
} |
|
362 |
return ret; |
|
363 |
} |
|
364 |
|
|
365 |
private String getResourceUrl(String xpath, Document doc, URL url) throws MalformedURLException { |
|
366 |
try { |
|
367 |
Node current = doc.getFirstChild(); |
|
368 |
String[] elements = xpath.split("/"); |
|
369 |
for (String element : elements) { |
|
370 |
if (element.trim().equals("")) |
|
371 |
continue; |
|
372 |
int position = Integer.parseInt(element.substring(element.indexOf('[')).replaceAll("\\[", "").replaceAll("\\]", "")); |
|
373 |
String name = element.substring(0, element.indexOf('[')); |
|
374 |
int found = 0; |
|
375 |
do { |
|
376 |
if (current.getNodeName().equals(name)) { |
|
377 |
found++; |
|
378 |
if (found == position) { |
|
379 |
current = current.getFirstChild(); |
|
380 |
break; |
|
381 |
} |
|
382 |
} |
|
383 |
} while ((current = current.getNextSibling()) != null); |
|
384 |
|
|
385 |
} |
|
386 |
String ret = current.getParentNode().getAttributes().getNamedItem("href").getNodeValue(); |
|
387 |
return Toolkit.makeAbsolute(ret, url); |
|
388 |
} catch (Exception e) { |
|
389 |
return null; |
|
390 |
} |
|
391 |
} |
|
392 |
|
|
393 |
private Object readResolve() throws IOException { |
|
394 |
if (robotstxtUrl != null) { |
|
395 |
URL url = new URL(robotstxtUrl); |
|
396 |
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); |
|
397 |
this.robot = new Robotstxt(in); |
|
398 |
this.directives = this.robot.getDirectivesFor(agentName); |
|
399 |
} else { |
|
400 |
ignoreRobotsTxt = true; |
|
401 |
} |
|
402 |
logger = Logger.getLogger(XPathAndCrawl.class); |
|
403 |
return this; |
|
404 |
} |
|
405 |
|
|
406 |
public boolean isResolveFrames() { |
|
407 |
return resolveFrames; |
|
408 |
} |
|
409 |
|
|
410 |
public void setResolveFrames(boolean resolveFrames) { |
|
411 |
this.resolveFrames = resolveFrames; |
|
412 |
} |
|
413 |
|
|
414 |
public boolean isSkipFirstPage() { |
|
415 |
return skipFirstPage; |
|
416 |
} |
|
417 |
|
|
418 |
public void setSkipFirstPage(boolean skipFirstPage) { |
|
419 |
this.skipFirstPage = skipFirstPage; |
|
420 |
} |
|
421 |
|
|
422 |
public long getSleepMillis() { |
|
423 |
return sleepMillis; |
|
424 |
} |
|
425 |
|
|
426 |
public void setSleepMillis(long sleepMillis) { |
|
427 |
this.sleepMillis = sleepMillis; |
|
428 |
} |
|
429 |
|
|
430 |
public List<String> getMimeTypes() { |
|
431 |
return mimeTypes; |
|
432 |
} |
|
433 |
|
|
434 |
public void setMimeTypes(List<String> mimeTypes) { |
|
435 |
this.mimeTypes = mimeTypes; |
|
436 |
} |
|
437 |
|
|
438 |
public List<String> getXpaths() { |
|
439 |
return xpaths; |
|
440 |
} |
|
441 |
|
|
442 |
public void setXpaths(List<String> xpaths) { |
|
443 |
this.xpaths = xpaths; |
|
444 |
} |
|
445 |
|
|
446 |
public void setIgnoreRobotsTxt(boolean ignoreRobotsTxt) { |
|
447 |
this.ignoreRobotsTxt = ignoreRobotsTxt; |
|
448 |
} |
|
449 |
|
|
450 |
public boolean isIgnoreRobotsTxt() { |
|
451 |
return ignoreRobotsTxt; |
|
452 |
} |
|
453 |
|
|
454 |
public void setAgentName(String agentName) { |
|
455 |
this.agentName = agentName; |
|
456 |
this.directives = this.robot.getDirectivesFor(agentName); |
|
457 |
} |
|
458 |
|
|
459 |
public String getAgentName() { |
|
460 |
return agentName; |
|
461 |
} |
|
462 |
|
|
463 |
public void setFallback(boolean fallback) { |
|
464 |
this.fallback = fallback; |
|
465 |
} |
|
466 |
|
|
467 |
public boolean isFallback() { |
|
468 |
return fallback; |
|
469 |
} |
|
470 |
|
|
471 |
} |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/methods/URLTransformation.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery.methods; |
|
2 |
|
|
3 |
import gr.uoa.di.resourcediscovery.MethodProvider; |
|
4 |
|
|
5 |
import java.net.URL; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.List; |
|
8 |
|
|
9 |
public class URLTransformation implements ResourceDiscoveryMethod { |
|
10 |
|
|
11 |
private String regex = null, replacement = ""; |
|
12 |
private String addToEnd = ""; |
|
13 |
|
|
14 |
@Override |
|
15 |
public List<String> getResources(URL upageUrl, MethodProvider provider) { |
|
16 |
String pageUrl = upageUrl.toString(); |
|
17 |
String trsf = pageUrl; |
|
18 |
if (regex != null && !regex.trim().equals("")) |
|
19 |
trsf = pageUrl.replaceAll(regex, replacement); |
|
20 |
|
|
21 |
trsf = trsf + addToEnd; |
|
22 |
|
|
23 |
List<String> ret = new ArrayList<String>(); |
|
24 |
ret.add(trsf); |
|
25 |
|
|
26 |
return ret; |
|
27 |
} |
|
28 |
|
|
29 |
public String getRegex() { |
|
30 |
return regex; |
|
31 |
} |
|
32 |
|
|
33 |
public void setRegex(String regex) { |
|
34 |
this.regex = regex; |
|
35 |
} |
|
36 |
|
|
37 |
public String getAddToEnd() { |
|
38 |
return addToEnd; |
|
39 |
} |
|
40 |
|
|
41 |
public void setAddToEnd(String addToEnd) { |
|
42 |
this.addToEnd = addToEnd; |
|
43 |
} |
|
44 |
|
|
45 |
public String getReplacement() { |
|
46 |
return replacement; |
|
47 |
} |
|
48 |
|
|
49 |
public void setReplacement(String replacement) { |
|
50 |
this.replacement = replacement; |
|
51 |
} |
|
52 |
|
|
53 |
} |
modules/uoa-resource-discovery/trunk/src/main/gr/uoa/di/resourcediscovery/Toolkit.java | ||
---|---|---|
1 |
package gr.uoa.di.resourcediscovery; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.net.HttpURLConnection; |
|
5 |
import java.net.MalformedURLException; |
|
6 |
import java.net.URL; |
|
7 |
import java.net.URLConnection; |
|
8 |
|
|
9 |
import org.apache.log4j.Logger; |
|
10 |
|
|
11 |
public class Toolkit { |
|
12 |
|
|
13 |
static transient Logger logger = Logger.getLogger(Toolkit.class); |
|
14 |
static int timeout = 10000; |
|
15 |
|
|
16 |
static public String makeAbsolute(String url, URL connectionUrl) throws MalformedURLException { |
|
17 |
return new URL(connectionUrl, url).toString(); |
|
18 |
} |
|
19 |
|
|
20 |
static public String makeRelative(URL connectionUrl) throws MalformedURLException { |
|
21 |
return connectionUrl.getPath(); |
|
22 |
} |
|
23 |
|
|
24 |
static public String getRedirectedUrl(String resourceURL, long sleepMillis) throws IOException, MalformedURLException { |
|
25 |
URL url = null; |
|
26 |
|
|
27 |
try { |
|
28 |
url = new URL(resourceURL); |
|
29 |
} catch (MalformedURLException mue) { |
|
30 |
logger.error("Error opening first url", mue); |
|
31 |
throw mue; |
|
32 |
} |
|
33 |
|
|
34 |
HttpURLConnection.setFollowRedirects(false); |
|
35 |
|
|
36 |
HttpURLConnection conn = null; |
|
37 |
try { |
|
38 |
Thread.sleep(sleepMillis); |
|
39 |
conn = (HttpURLConnection) url.openConnection(); |
|
40 |
conn.setConnectTimeout(timeout); |
|
41 |
conn.setReadTimeout(timeout); |
|
42 |
conn.setAllowUserInteraction(false); |
|
43 |
conn.setDoOutput(true); |
|
44 |
} catch (ClassCastException ex) { |
|
45 |
throw new MalformedURLException(); |
|
46 |
} catch (InterruptedException e) { |
|
47 |
e.printStackTrace(); |
|
48 |
} |
|
49 |
|
|
50 |
conn.setRequestMethod("HEAD"); |
|
51 |
|
|
52 |
try { |
|
53 |
conn = openConnectionCheckRedirects(conn, sleepMillis); |
|
54 |
} catch (Exception ex) { |
|
55 |
throw new MalformedURLException(); |
|
56 |
} |
|
57 |
|
|
58 |
try { |
|
59 |
Thread.sleep(sleepMillis); |
|
60 |
} catch (InterruptedException e) { |
|
61 |
e.printStackTrace(); |
|
62 |
} |
|
63 |
int statusCode = conn.getResponseCode(); |
|
64 |
if (statusCode == 503) { |
|
65 |
logger.error("Url " + conn.getURL() + " reported status code 503. Please increase the crawler's sleep time."); |
Also available in: Unified diff
copying to move to latest codebase