Project

General

Profile

« Previous | Next » 

Revision 54895

[maven-release-plugin] copy for tag dnet-download-plugins-2.1.20

View differences:

modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-download-plugins/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-download-plugins"}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/test/java/DateIntervalTest.java
1
import org.joda.time.DateTime;
2
import org.joda.time.Days;
3
import org.junit.Assert;
4
import org.junit.Before;
5
import org.junit.Test;
6

  
7
public class DateIntervalTest {
8

  
9
	@Before
10
	public void setUp() throws Exception {}
11

  
12
	@Test
13
	public void test() {
14
		DateTime beforeDate = new DateTime(2010, 01, 1, 0, 0);
15
		DateTime now = new DateTime();
16

  
17
		Days day = Days.daysBetween(beforeDate, now);
18
		Assert.assertTrue(day.getDays() > 0);
19
	}
20

  
21
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/test/java/eu/dnetlib/download/plugin/PathRetreiverTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import org.apache.commons.io.IOUtils;
4
import org.apache.commons.lang3.StringUtils;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7
import org.junit.After;
8
import org.junit.Assert;
9
import org.junit.Before;
10
import org.junit.Test;
11
import org.junit.rules.TemporaryFolder;
12

  
13
import java.io.File;
14
import java.io.IOException;
15
import java.util.Arrays;
16

  
17
public class PathRetreiverTest {
18

  
19
    private static final Log log = LogFactory.getLog(PathRetreiverTest.class); // NOPMD by marko on 11/24/08 5:02 PM
20

  
21
    private TemporaryFolder t = new TemporaryFolder();
22

  
23
    private final PathRetreiver pt = new PathRetreiver();
24

  
25
    @Before
26
    public void setUp() throws IOException {
27
        t.create();
28

  
29
        IOUtils.readLines(getClass().getResourceAsStream("pmc_dirs.txt")).forEach(t::newFolder);
30
        for(final File dir : t.getRoot().listFiles(pathname -> pathname.isDirectory())) {
31
            int lower = Integer.parseInt(StringUtils.substringBefore(dir.getName(), "_").replaceAll("PMC", ""));
32
            int upper = Integer.parseInt(StringUtils.substringAfter(dir.getName(), "_").replaceAll("PMC", ""));
33

  
34
            for(int i=lower;i<lower+3 && i<upper;i++) {
35
               t.newFile(dir.getName() + "/" + i + ".xml");
36
            }
37
            for(int i=upper;i>upper-3 && i>lower;i--) {
38
                t.newFile(dir.getName() + "/" + i + ".xml");
39
            }
40
        }
41

  
42
        pt.setBase_path(t.getRoot().getPath());
43
    }
44

  
45

  
46
    @Test
47
    public void testPathRetriever() {
48

  
49
        String pathForPMCID = pt.getPathForPMCID(4676029);
50
        Assert.assertNotNull(pathForPMCID);
51
        log.info(pathForPMCID);
52

  
53
        pathForPMCID = pt.getPathForPMCID(4676028);
54
        Assert.assertNotNull(pathForPMCID);
55
        log.info(pathForPMCID);
56

  
57
        pathForPMCID = pt.getPathForPMCID(4676026);
58
        Assert.assertNull(pathForPMCID);
59
    }
60

  
61
    @After
62
    public void tearDown() {
63
        t.delete();
64
    }
65
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/test/java/eu/dnetlib/download/plugin/ArxivImportFromFileTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import junit.framework.Assert;
4

  
5
import org.junit.Before;
6
import org.junit.Test;
7

  
8
import com.google.gson.Gson;
9
import com.google.gson.GsonBuilder;
10

  
11
import eu.dnetlib.data.download.rmi.DownloadItem;
12

  
13
public class ArxivImportFromFileTest {
14

  
15
	@Before
16
	public void setUp() throws Exception {}
17

  
18
	@Test
19
	public void testREgEx() {
20
		String regExp = "\\d{4}\\.\\d{4}";
21

  
22
		String input = "1308.0021";
23

  
24
		Assert.assertTrue(input.matches(regExp));
25

  
26
	}
27

  
28
	@Test
29
	public void testPlugin() {
30
		DownloadItem item = new DownloadItem();
31
		item.setOpenAccess("OPEN");
32
		String[] myList = new String[] { "http://arxiv.org/abs/1710.04937", "b" };
33
		Gson g = new GsonBuilder().disableHtmlEscaping().create();
34
		item.setUrl(g.toJson(myList));
35

  
36
		ArxivImportFromFile plugin = new ArxivImportFromFile();
37
		plugin.setBasePath("/tmp");
38

  
39
		plugin.retrieveUrl(item);
40

  
41
        System.out.println(item.getUrl());
42

  
43
		Assert.assertNotNull(item);
44

  
45
	}
46

  
47
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/test/java/eu/dnetlib/download/plugin/DSpacePDFLinkPluginsTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import eu.dnetlib.data.download.rmi.DownloadPlugin;
4
import org.junit.Assert;
5
import org.junit.Ignore;
6
import org.junit.Test;
7
import org.springframework.beans.factory.annotation.Autowired;
8

  
9
import java.util.ArrayList;
10
import java.util.List;
11

  
12
/**
13
 * Created by sandro on 5/15/17.
14
 */
15
@Ignore
16
public class DSpacePDFLinkPluginsTest {
17

  
18
    @Autowired
19
    DSpacePDFLinkPlugins dp;
20

  
21

  
22
    @Test
23
    public void testURL () {
24
        final DSpacePDFLinkPlugins plugin = new DSpacePDFLinkPlugins();
25

  
26
        final String s1 = plugin.extractURL("http://acikerisim.baskent.edu.tr/handle/11727/2340");
27
        Assert.assertEquals(s1, "http://acikerisim.baskent.edu.tr/bitstream/11727/2340/1/10008302.pdf");
28
    }
29

  
30
//    @Test
31
//    public void testDOI() {
32
//        final DSpacePDFLinkPlugins plugin = new DSpacePDFLinkPlugins();
33
//
34
//        final String s1 = plugin.extractURL("http://hdl.handle.net/10017/29866");
35
//       // Assert.assertEquals(s1, "http://acikerisim.baskent.edu.tr/bitstream/11727/2340/1/10008302.pdf");
36
//        System.out.println(s1);
37
//    }
38

  
39

  
40
    @Test
41
    public void testURLUniTO(){
42
        final DSpacePDFLinkPlugins plugin = new DSpacePDFLinkPlugins();
43
//        List<String> lst = new ArrayList<>();
44
//        lst.add("^http.*pdf$");
45
//        plugin.setRegularExpression(lst);
46
        final String s1 = plugin.extractURL("http://hdl.handle.net/2318/37517");
47
        Assert.assertEquals("https://iris.unito.it/bitstream/2318/37517/1/2002%20JBC%20-%20Integrin-induced%20epidermal%20growth%20factor%20%28EGF%29%20receptor%20activation%20requires%20c-Src%20and%20p130Cas.pdf",s1);
48
    }
49

  
50
    @Test
51
    public void downloadTest(){
52

  
53
    }
54

  
55

  
56

  
57
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/test/resources/eu/dnetlib/download/plugin/pmc_dirs.txt
1
PMC2114664_PMC2135116
2
PMC2135117_PMC2173206
3
PMC2173207_PMC2193606
4
PMC2193607_PMC2228428
5
PMC2228429_PMC2289497
6
PMC2289498_PMC2376858
7
PMC2376859_PMC2453635
8
PMC2453636_PMC2562811
9
PMC2562812_PMC2600168
10
PMC2600169_PMC2639698
11
PMC2639699_PMC2679038
12
PMC2679039_PMC2709009
13
PMC2709010_PMC2739494
14
PMC2739495_PMC2774688
15
PMC2774689_PMC2807107
16
PMC2807108_PMC2837157
17
PMC2837158_PMC2872237
18
PMC2872246_PMC2900260
19
PMC2900261_PMC2931455
20
PMC2931456_PMC2959298
21
PMC2959299_PMC2977153
22
PMC2977154_PMC3000089
23
PMC3000090_PMC3018434
24
PMC3018435_PMC3043758
25
PMC3043759_PMC3066749
26
PMC3066750_PMC3094005
27
PMC3094006_PMC3115464
28
PMC3115476_PMC3139354
29
PMC3139355_PMC3161964
30
PMC3161966_PMC3184122
31
PMC3184123_PMC3205810
32
PMC3205811_PMC3226963
33
PMC3226964_PMC3246723
34
PMC3246724_PMC3267738
35
PMC3267739_PMC3287439
36
PMC3287440_PMC3305035
37
PMC3305036_PMC3324463
38
PMC3324464_PMC3342054
39
PMC3342055_PMC3359980
40
PMC3359981_PMC3379654
41
PMC3379655_PMC3400869
42
PMC3400870_PMC3420321
43
PMC3420322_PMC3440431
44
PMC3440432_PMC3465972
45
PMC3465973_PMC3485114
46
PMC3485115_PMC3504117
47
PMC3504118_PMC3521368
48
PMC3521369_PMC3540995
49
PMC3541002_PMC3559779
50
PMC3559780_PMC3579017
51
PMC3579018_PMC3597585
52
PMC3597586_PMC3616740
53
PMC3616741_PMC3637505
54
PMC3637506_PMC3656847
55
PMC3656848_PMC3674696
56
PMC3674697_PMC3694027
57
PMC3694028_PMC3713561
58
PMC3713562_PMC3734723
59
PMC3734724_PMC3756282
60
PMC3756283_PMC3775526
61
PMC3775527_PMC3794997
62
PMC3794998_PMC3817535
63
PMC3817536_PMC3834982
64
PMC3834984_PMC3851930
65
PMC3851934_PMC3870407
66
PMC3870408_PMC3888268
67
PMC3888269_PMC3905545
68
PMC3905546_PMC3923943
69
PMC3923951_PMC3942209
70
PMC3942210_PMC3962119
71
PMC3962121_PMC3980435
72
PMC3980436_PMC3997492
73
PMC3997493_PMC4016582
74
PMC4016583_PMC4034045
75
PMC4034053_PMC4052667
76
PMC4052668_PMC4069525
77
PMC4069526_PMC4087078
78
PMC4087079_PMC4104043
79
PMC4104070_PMC4123881
80
PMC4123882_PMC4143513
81
PMC4143514_PMC4161060
82
PMC4161143_PMC4179239
83
PMC4179240_PMC4196090
84
PMC4196091_PMC4213380
85
PMC4213381_PMC4229908
86
PMC4229909_PMC4246363
87
PMC4246371_PMC4265290
88
PMC4265291_PMC4283529
89
PMC4283530_PMC4301549
90
PMC4301550_PMC4320116
91
PMC4320117_PMC4336280
92
PMC4336281_PMC4354467
93
PMC4354468_PMC4372225
94
PMC4372226_PMC4389034
95
PMC4389035_PMC4406610
96
PMC4406611_PMC4423035
97
PMC4423048_PMC4439555
98
PMC4439556_PMC4456817
99
PMC4456818_PMC4474724
100
PMC4474725_PMC4491649
101
PMC4491650_PMC4508683
102
PMC4508684_PMC4525193
103
PMC4525194_PMC4543216
104
PMC4543217_PMC4560130
105
PMC4560131_PMC4579196
106
PMC4579197_PMC4595007
107
PMC4595008_PMC4609474
108
PMC4609475_PMC4626773
109
PMC4626774_PMC4643236
110
PMC4643237_PMC4660323
111
PMC4660324_PMC4676029
112
PMC4676030_PMC4693769
113
PMC4693770_PMC4710023
114
PMC4710024_PMC4728071
115
PMC4728075_PMC4743341
116
PMC4743342_PMC4758928
117
PMC4758929_PMC4775014
118
PMC4775015_PMC4793100
119
PMC4793101_PMC4809441
120
PMC4809442_PMC4826653
121
PMC4826654_PMC4844069
122
PMC4844073_PMC4859985
123
PMC4859986_PMC4877359
124
PMC4877365_PMC4894427
125
PMC4894428_PMC4910313
126
PMC4910314_PMC4928029
127
PMC4928030_PMC4943888
128
PMC4943889_PMC4963471
129
PMC4963472_PMC4979068
130
PMC4979069_PMC4996129
131
PMC4996130_PMC5011754
132
PMC5011755_PMC5027112
133
PMC5027113_PMC5041839
134
PMC5041840_PMC5055367
135
PMC5055368_PMC5070294
136
PMC5070295_PMC5084992
137
PMC5084998_PMC5098989
138
PMC5098990_PMC5113742
139
PMC5113743_PMC5128874
140
PMC5128875_PMC5142017
141
PMC5142018_PMC5155182
142
PMC5155183_PMC5168587
143
PMC5168588_PMC5181444
144
PMC5181445_PMC5195159
145
PMC5195160_PMC5207976
146
PMC5207977_PMC5220330
147
PMC5220331_PMC5232993
148
PMC5232994_PMC5245953
149
PMC5245954_PMC5258212
150
PMC5258213_PMC5269906
151
PMC5269907_PMC5281504
152
PMC5281505_PMC5294573
153
PMC5294574_PMC5307639
154
PMC5307640_PMC5320633
155
PMC5320634_PMC5334419
156
PMC5334420_PMC5348122
157
PMC5348123_PMC5362478
158
PMC5362479_PMC5379575
159
PMC5379578_PMC5393849
160
PMC5393850_PMC5409456
161
PMC5409462_PMC5424203
162
PMC5424204_PMC5437630
163
PMC5437631_PMC5452760
164
PMC5452761_PMC5467055
165
PMC5467056_PMC5483227
166
PMC5483229_PMC5497534
167
PMC549050_PMC1240576
168
PMC5497535_PMC5513250
169
PMC5513251_PMC5528017
170
PMC5528018_PMC5544983
171
PMC5544984_PMC5561184
172
PMC5561185_PMC5576750
173
PMC5576751_PMC5591643
174
PMC5591783_PMC5608430
175
PMC5608431_PMC5623045
176
PMC5623046_PMC5637514
177
PMC5637515_PMC5653330
178
PMC5653331_PMC5669206
179
PMC5669215_PMC5686621
180
PMC5686622_PMC5703149
181
PMC5703150_PMC5718512
182
PMC5718513_PMC5732977
183
PMC5732978_PMC5748310
184
PMC5748311_PMC5763634
185
PMC5763635_PMC5779436
186
PMC5779437_PMC5794819
187
PMC5794820_PMC5810839
188
PMC5810840_PMC5827115
189
PMC5827136_PMC5842751
190
PMC5842752_PMC5857142
191
PMC5857143_PMC5872445
192
PMC5872446_PMC5887736
193
PMC5887737_PMC5903694
194
PMC5903695_PMC5920276
195
PMC5920278_PMC5938386
196
PMC5938387_PMC5956503
197
PMC5956504_PMC5975655
198
PMC5975656_PMC5991267
199
PMC5991268_PMC6006820
200
PMC6006821_PMC6022687
201
PMC6022688_PMC6036291
202
PMC6036292_PMC6053207
203
PMC6053208_PMC6069616
204
PMC6069617_PMC6090019
205
PMC6090020_PMC6105291
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/EuropePMC.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadPluginException;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

  
16
// TODO: Auto-generated Javadoc
17

  
18
/**
19
 * The Class EuropePMC.
20
 */
21
public class EuropePMC extends AbstractDownloadPlugin implements DownloadPlugin {
22

  
23
    private static final Log log = LogFactory.getLog(EuropePMC.class);
24

  
25
    /**
26
     * The base path.
27
     */
28
    private String basePath;
29

  
30
    // //*[local-name()='metadata']//*[local-name()='identifier' and ./@identifierType='pmc']/text()
31

  
32
    /*
33
     * (non-Javadoc)
34
     *
35
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
36
     */
37
    @Override
38
    public String getPluginName() {
39
        return "europePMCDownloadPlugin";
40
    }
41

  
42
    @Override
43
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) throws DownloadPluginException {
44
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
45

  
46
            @Override
47
            public DownloadItem apply(final DownloadItem input) {
48
                return retrieveUrl(input);
49
            }
50
        });
51
    }
52

  
53
    /*
54
     * (non-Javadoc)
55
     *
56
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
57
     */
58
    @Override
59
    public DownloadItem retrieveUrl(final DownloadItem input) throws DownloadPluginException {
60

  
61
        try {
62
            String url = input.getOriginalUrl();
63
            if ((url == null) || (url.trim().length() == 0)) return input;
64
            @SuppressWarnings("unchecked")
65
            List<String> urls = new Gson().fromJson(url, ArrayList.class);
66
            log.debug(String.format("urls is %s", url));
67
            if ((urls == null) || (urls.size() == 0)) return input;
68
            input.setFileName(input.getIdItemMetadata());
69
            Boolean added = false;
70
            for (String s : urls) {
71
                if (s.startsWith("http")) {
72
                    log.debug(String.format("found url starting with http replace original URL with %s", s));
73
                    input.setOriginalUrl(s);
74
                } else if (s.startsWith("PMC")) {
75
                    String correctUrl = s.replace("PMC", "");
76
                    log.debug(String.format("found url starting with PMC %s", correctUrl));
77
                    added = true;
78
                    String path = PathRetreiver.getInstance(basePath).getPathForPMCID(Integer.parseInt(correctUrl));
79
                    if (path != null) {
80
                        input.setUrl("file://" + path);
81
                    } else {
82
                        input.setUrl(path);
83
                    }
84

  
85
                }
86
            }
87
            if (added == false) {
88
                input.setOriginalUrl(null);
89
                input.setUrl(null);
90
            }
91
            return input;
92
        } catch (Throwable e) {
93
            log.error("Exception on Download Plugin");
94
            log.error(e);
95
            throw new DownloadPluginException(e);
96
        }
97

  
98
    }
99

  
100
    /**
101
     * Gets the base path.
102
     *
103
     * @return the basePath
104
     */
105
    public String getBasePath() {
106
        return basePath;
107
    }
108

  
109
    /**
110
     * Sets the base path.
111
     *
112
     * @param basePath the basePath to set
113
     */
114
    @Override
115
    public void setBasePath(final String basePath) {
116
        this.basePath = basePath;
117
    }
118

  
119
    @Override
120
    public String extractURL(String baseURL) throws DownloadPluginException {
121
        return null;
122
    }
123
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/ArxivDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadPluginException;
13

  
14
/**
15
 * @Deprecated: use ArxivImportFromFile instead
16
 */
17
@Deprecated
18
public class ArxivDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
19

  
20
    @Override
21
    public DownloadItem retrieveUrl(final DownloadItem input) {
22
        if (checkOpenAccess(input) == null) {
23
            return null;
24
        }
25
        String url = input.getOriginalUrl();
26
        if (url == null || url.trim().length() == 0) {
27
            return input;
28
        }
29
        @SuppressWarnings("unchecked")
30
        List<String> urls = new Gson().fromJson(url, ArrayList.class);
31
        if (urls == null || urls.size() == 0) {
32
            return input;
33
        }
34
        for (String s : urls) {
35
            if (s.startsWith("http")) {
36
                input.setOriginalUrl(s);
37
                String correctUrl = s.replace("abs", "pdf");
38
                correctUrl += ".pdf";
39
                input.setUrl(correctUrl);
40
            }
41
        }
42
        return input;
43
    }
44

  
45
    @Override
46
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) {
47
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
48

  
49
            @Override
50
            public DownloadItem apply(final DownloadItem input) {
51
                return retrieveUrl(input);
52
            }
53
        });
54
    }
55

  
56
    @Override
57
    public String getPluginName() {
58
        return "ArxivDownloadPlugin";
59
    }
60

  
61
    @Override
62
    public void setBasePath(final String basePath) {
63

  
64
    }
65

  
66
    @Override
67
    public String extractURL(String baseURL) throws DownloadPluginException {
68
        return null;
69
    }
70
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/EasyPDFDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadPluginException;
13

  
14
/**
15
 * The Class EasyPDFDownloadPlugin.
16
 */
17
public class EasyPDFDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
18

  
19
    /*
20
     * (non-Javadoc)
21
     *
22
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
23
     */
24
    @Override
25
    public DownloadItem retrieveUrl(final DownloadItem input) {
26
        if (checkOpenAccess(input) == null) {
27
            return null;
28
        }
29
        if (input == null) {
30
            return null;
31
        }
32
        String url = input.getOriginalUrl();
33

  
34
        if (url == null || url.trim().length() == 0) {
35
            return input;
36
        }
37
        @SuppressWarnings("unchecked")
38
        List<String> urls = new Gson().fromJson(url, ArrayList.class);
39
        if (urls == null || urls.size() == 0) {
40
            return input;
41
        }
42
        if (checkUrlsNotNull(input, urls))
43
            return input;
44
        input.setOriginalUrl(null);
45
        input.setUrl(null);
46
        return input;
47
    }
48

  
49
    @Override
50
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) {
51
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
52

  
53
            @Override
54
            public DownloadItem apply(final DownloadItem input) {
55
                return retrieveUrl(input);
56
            }
57
        });
58
    }
59

  
60
    /*
61
     * (non-Javadoc)
62
     *
63
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
64
     */
65
    @Override
66
    public String getPluginName() {
67
        return "easyPDFDownloadPlugin";
68
    }
69

  
70
    @Override
71
    public void setBasePath(final String basePath) {
72
        // TODO Auto-generated method stub
73

  
74
    }
75

  
76
    @Override
77
    public String extractURL(String baseURL) throws DownloadPluginException {
78
        return baseURL.endsWith(".pdf") ? baseURL : null;
79
    }
80
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/ELisDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadPluginException;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.jsoup.Jsoup;
16
import org.jsoup.nodes.Document;
17
import org.jsoup.nodes.Element;
18
import org.jsoup.select.Elements;
19

  
20
/**
21
 * The Class ELisDownloadPlugin.
22
 */
23
public class ELisDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
24

  
25
    /**
26
     * The Constant log.
27
     */
28
    private static final Log log = LogFactory.getLog(ELisDownloadPlugin.class);
29

  
30
    /**
31
     * Extract url.
32
     *
33
     * @param url the url
34
     * @return the string
35
     */
36
    @Override
37
    public String extractURL(final String url) throws DownloadPluginException {
38
        try {
39
            Document doc = Jsoup.connect(url).get();
40
            Elements links = doc.select("a[href$=.pdf]");
41
            for (Element link : links) {
42
                String linkvalue = link.attr("abs:href");
43
                if (!linkvalue.toLowerCase().contains("thumbnailversion")) {
44
                    return linkvalue;
45
                }
46
            }
47
            return null;
48
        } catch (Exception e) {
49
            throw new DownloadPluginException("Error on extract URL", e);
50
        }
51

  
52
    }
53

  
54
    @Override
55
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) {
56
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
57

  
58
            @Override
59
            public DownloadItem apply(final DownloadItem input) {
60
                return retrieveUrl(input);
61
            }
62
        });
63
    }
64

  
65
    /*
66
     * (non-Javadoc)
67
     *
68
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
69
     */
70
    @Override
71
    public String getPluginName() {
72
        return "ELisDownloadPlugin";
73
    }
74

  
75
    /*
76
     * (non-Javadoc)
77
     *
78
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
79
     */
80
    @Override
81
    public DownloadItem retrieveUrl(final DownloadItem input) {
82
        if (checkOpenAccess(input) == null) {
83
            return null;
84
        }
85
        String url = input.getOriginalUrl();
86

  
87
        if (url == null || url.trim().length() == 0) {
88
            return input;
89
        }
90
        @SuppressWarnings("unchecked")
91
        List<String> urls = new Gson().fromJson(url, ArrayList.class);
92
        if (urls == null || urls.size() == 0) {
93
            return input;
94
        }
95
        if (checkUrlsNotNull(input, urls))
96
            return input;
97
        input.setOriginalUrl(null);
98
        input.setUrl(null);
99
        return input;
100
    }
101

  
102
    @Override
103
    public void setBasePath(final String basePath) {
104
        // TODO Auto-generated method stub
105

  
106
    }
107

  
108
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/PathRetreiver.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.io.File;
4
import java.io.FileFilter;
5
import java.util.*;
6

  
7
import org.apache.commons.lang3.StringUtils;
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10

  
11
/**
12
 * The Class PathRetreiver.
13
 */
14
public class PathRetreiver {
15

  
16
	private static final Log log = LogFactory.getLog(PathRetreiver.class);
17

  
18
	/** The instance. */
19
	private static PathRetreiver instance;
20

  
21
	/**
22
	 * Gets the single instance of PathRetreiver.
23
	 *
24
	 * @param base_path
25
	 *            the base_path
26
	 * @return single instance of PathRetreiver
27
	 */
28
	public static PathRetreiver getInstance(final String base_path) {
29
		if (instance == null) {
30
			instance = new PathRetreiver();
31
			instance.setBase_path(base_path);
32
		}
33
		return instance;
34
	}
35

  
36
	/** The base_path. */
37
	private String base_path;
38

  
39
	/** The values. */
40
	private TreeMap<Integer, InfoPath> values;
41

  
42
	/**
43
	 * Bootstrap.
44
	 */
45
	private void bootstrap() {
46
		values = new TreeMap<>();
47
		File basePath = new File(this.base_path);
48
		File[] selectedFiles = basePath.listFiles(pathname -> pathname.isDirectory());
49

  
50
		for (File f : selectedFiles) {
51
			String lower = StringUtils.substringAfter(StringUtils.substringBefore(f.getName(), "_"), "PMC");
52
			String upper = StringUtils.substringAfter(StringUtils.substringAfter(f.getName(), "_"), "PMC");
53
			String path = f.getPath();
54
			InfoPath i = new InfoPath();
55
			i.setLower(Integer.parseInt(lower));
56
			i.setUpper(Integer.parseInt(upper));
57
			i.setPath(path);
58
			values.put(i.getLower(), i);
59
		}
60

  
61

  
62
		if (log.isDebugEnabled()) {
63
			for (InfoPath p : values.values()) {
64
				log.debug(String.format("%s -- %s : %s", p.getLower(), p.getUpper(), p.getPath()));
65
			}
66
		}
67
	}
68

  
69
	/**
70
	 * Gets the path for pmcid.
71
	 *
72
	 * @param pmcID
73
	 *            the pmc id
74
	 * @return the path for pmcid
75
	 */
76
	public String getPathForPMCID(final int pmcID) {
77
		if (values == null) {
78
			bootstrap();
79
		}
80

  
81
		Map.Entry<Integer, InfoPath> infoPath = values.floorEntry(pmcID);
82
		if (infoPath != null) {
83

  
84
			final String currentPath = infoPath.getValue().getPath() + "/" + pmcID + ".xml";
85
			final File f = new File(currentPath);
86
			log.debug(String.format("try to search in path %s", currentPath));
87
			String s = null;
88
			if (f.exists()) {
89
				s = f.getPath();
90
				log.debug(String.format("found in %s", s));
91
			} else {
92
				log.debug(String.format("not found in %s", s));
93
			}
94
			return s;
95
		}
96
		log.debug(String.format("PMC with ID: %s not found", pmcID));
97
		return null;
98
	}
99

  
100
	/**
101
	 * Sets the base_path.
102
	 *
103
	 * @param base_path
104
	 *            the new base_path
105
	 */
106
	public void setBase_path(final String base_path) {
107
		this.base_path = base_path;
108
	}
109

  
110
	/**
111
	 * Gets the base_path.
112
	 *
113
	 * @return the base_path
114
	 */
115
	public String getBase_path() {
116
		return this.base_path;
117
	}
118
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/ArxivImportFromFile.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.io.File;
4
import java.nio.file.Path;
5
import java.nio.file.Paths;
6
import java.util.List;
7

  
8
import com.google.common.collect.Iterables;
9
import com.google.gson.Gson;
10
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
11
import eu.dnetlib.data.download.rmi.DownloadItem;
12
import eu.dnetlib.data.download.rmi.DownloadPlugin;
13
import eu.dnetlib.data.download.rmi.DownloadPluginException;
14
import org.apache.commons.lang3.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17

  
18
public class ArxivImportFromFile extends AbstractDownloadPlugin implements DownloadPlugin {
19

  
20
    private static final Log log = LogFactory.getLog(ArxivImportFromFile.class); // NOPMD by marko on 11/24/08 5:02 PM
21

  
22
    /**
23
     * The base path.
24
     */
25
    private String basePath;
26

  
27
    @Override
28
    public DownloadItem retrieveUrl(final DownloadItem item) {
29
        if (checkOpenAccess(item) == null) return null;
30
        final String baseURLs = item.getUrl();
31
        final List<String> urlsList = new Gson().fromJson(baseURLs, List.class);
32
        for (final String baseURL : urlsList) {
33

  
34
            if (baseURL.isEmpty() == false && baseURL.trim().startsWith("http://") == true) {
35
                final String name = StringUtils.substringAfter(baseURL, "abs/").trim();
36
                if (name == null) {
37
                    item.setUrl(null);
38
                    return item;
39
                }
40

  
41
                final String fileURL = createPath(name);
42
                if (StringUtils.isBlank(fileURL)) {
43
                    item.setUrl(null);
44
                    return item;
45
                }
46

  
47
                final File f = new File(fileURL);
48
                if (f.exists()) {
49
                    if (log.isDebugEnabled()) {
50
                        log.debug("found path associated to " + item.getIdItemMetadata() + " with path : " + fileURL);
51
                    }
52
                    item.setUrl("file://" + fileURL);
53
                } else {
54
                    if (log.isDebugEnabled()) {
55
                        log.debug("NOT found path associated to " + item.getIdItemMetadata()+": "+fileURL);
56
                    }
57
                    item.setUrl(null);
58
                }
59
                item.setOriginalUrl(baseURL);
60
                return item;
61

  
62
            } else {
63
                item.setUrl(null);
64
            }
65

  
66
        }
67
        return item;
68
    }
69

  
70
    public String createPath(final String name) {
71
        final String regExp = "\\d+\\.\\d+";
72
        if (name.matches(regExp)) {
73
            final String[] values = name.split("\\.");
74
            final Path bsPath = Paths.get(basePath);
75
            final Path filePath = Paths.get(String.format("%s/%s.pdf", values[0], name));
76

  
77
            final String fileURL = bsPath.resolve(filePath).toString();
78
            return fileURL;
79
        } else {
80
            if (name.contains("/")) {
81
                final String[] values = name.split("/");
82
                if (values.length != 2) return null;
83
                if (values[1].length() > 4) {
84
                    final String middle = values[1].substring(0, 4);
85
                    final Path bsPath = Paths.get(basePath);
86
                    final Path filePath = Paths.get(String.format("%s/%s.pdf", middle, name.replace("/", "")));
87
                    final String fileURL = bsPath.resolve(filePath).toString();
88
                    return fileURL;
89
                }
90
            }
91
        }
92
        return null;
93
    }
94

  
95
    @Override
96
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> items) {
97
        return Iterables.transform(items, input -> retrieveUrl(input));
98
    }
99

  
100
    @Override
101
    public String getPluginName() {
102
        return "ArxivImportFromFile";
103
    }
104

  
105
    /**
106
     * @return the basePath
107
     */
108
    public String getBasePath() {
109
        return basePath;
110
    }
111

  
112
    /**
113
     * @param basePath the basePath to set
114
     */
115
    @Override
116
    public void setBasePath(final String basePath) {
117
        this.basePath = basePath;
118
    }
119

  
120
    @Override
121
    public String extractURL(String baseURL) throws DownloadPluginException {
122
        return null;
123
    }
124
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/InfoPath.java
1
package eu.dnetlib.download.plugin;
2

  
3
/**
4
 * The Class InfoPath.
5
 */
6
public class InfoPath {
7

  
8
	/** The lower. */
9
	private int lower;
10

  
11
	/** The upper. */
12
	private int upper;
13

  
14
	/** The path. */
15
	private String path;
16

  
17
	/**
18
	 * Gets the lower.
19
	 *
20
	 * @return the lower
21
	 */
22
	public int getLower() {
23
		return lower;
24
	}
25

  
26
	/**
27
	 * Sets the lower.
28
	 *
29
	 * @param lower
30
	 *            the new lower
31
	 */
32
	public void setLower(final int lower) {
33
		this.lower = lower;
34
	}
35

  
36
	/**
37
	 * Gets the upper.
38
	 *
39
	 * @return the upper
40
	 */
41
	public int getUpper() {
42
		return upper;
43
	}
44

  
45
	/**
46
	 * Sets the upper.
47
	 *
48
	 * @param upper
49
	 *            the new upper
50
	 */
51
	public void setUpper(final int upper) {
52
		this.upper = upper;
53
	}
54

  
55
	/**
56
	 * Gets the path.
57
	 *
58
	 * @return the path
59
	 */
60
	public String getPath() {
61
		return path;
62
	}
63

  
64
	/**
65
	 * Sets the path.
66
	 *
67
	 * @param path
68
	 *            the new path
69
	 */
70
	public void setPath(final String path) {
71
		this.path = path;
72
	}
73
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/DLibPlugin.java
1
/**
2
 *
3
 */
4
package eu.dnetlib.download.plugin;
5

  
6

  
7
import com.google.common.base.Function;
8
import com.google.common.collect.Iterables;
9
import eu.dnetlib.data.download.rmi.DownloadItem;
10
import eu.dnetlib.data.download.rmi.DownloadPluginException;
11

  
12
/**
13
 * The Class EasyPDFDownloadPlugin.
14
 */
15
public class DLibPlugin extends HALPdfDocumentPlugin {
16

  
17

  
18
    @Override
19
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) {
20
        return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
21

  
22
            @Override
23
            public DownloadItem apply(final DownloadItem input) {
24
                return retrieveUrl(input);
25
            }
26
        });
27
    }
28

  
29
    /*
30
     * (non-Javadoc)
31
     *
32
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
33
     */
34
    @Override
35
    public String getPluginName() {
36
        return "DLIBDownloadPlugin";
37
    }
38

  
39
    @Override
40
    public void setBasePath(final String basePath) {
41
        // TODO Auto-generated method stub
42

  
43
    }
44

  
45
    @Override
46
    public String extractURL(String baseURL) throws DownloadPluginException {
47
        return baseURL.trim().endsWith(".html") ? baseURL : null;
48
    }
49
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/DSpacePDFLinkPlugins.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.io.IOException;
4
import java.net.HttpURLConnection;
5
import java.net.URL;
6
import java.util.ArrayList;
7
import java.util.List;
8

  
9
import com.google.common.collect.Iterables;
10
import com.google.gson.Gson;
11
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadItem;
13
import eu.dnetlib.data.download.rmi.DownloadPlugin;
14
import eu.dnetlib.data.download.rmi.DownloadPluginException;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.jsoup.Jsoup;
18
import org.jsoup.nodes.Document;
19
import org.jsoup.nodes.Element;
20
import org.jsoup.select.Elements;
21

  
22
public class DSpacePDFLinkPlugins extends AbstractDownloadPlugin implements DownloadPlugin {
23

  
24
    /**
25
     * The Constant log.
26
     */
27
    private static final Log log = LogFactory.getLog(DSpacePDFLinkPlugins.class);
28

  
29
    private final static int maxNumberJump = 10;
30

  
31
    private final static int maxNumberConnectRetries = 5;
32

  
33
    /**
34
     * Milliseconds used to backoff in case of connection errors.
35
     */
36
    private final static int BACKOFF_FACTOR = 100;
37

  
38
    private String getHTTPRedirectedURL(final String mainURL) throws Exception {
39
        URL startURL = new URL(mainURL);
40
        HttpURLConnection conn = (HttpURLConnection) startURL.openConnection();
41

  
42
        conn.setConnectTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT);
43

  
44
        conn.setInstanceFollowRedirects(true);  // you still need to handle redirect manually.
45
        HttpURLConnection.setFollowRedirects(true);
46
        String location = mainURL;
47

  
48
        int numJump = 1;
49

  
50
        int responseCode = conn.getResponseCode();
51

  
52
        while ((responseCode >= 300) && (responseCode < 400) && (numJump++ < maxNumberJump)) {
53
            location = conn.getHeaderFields().get("Location").get(0);
54
            conn.disconnect();
55
            startURL = new URL(location);
56
            conn = (HttpURLConnection) startURL.openConnection();
57
            conn.setConnectTimeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT);
58
            conn.setInstanceFollowRedirects(true);  // you still need to handle redirect manually.
59
            HttpURLConnection.setFollowRedirects(true);
60
            responseCode = conn.getResponseCode();
61
        }
62
        conn.disconnect();
63
        if (!((responseCode >= 200) && (responseCode < 300)))
64
            return null;
65
        return location;
66
    }
67

  
68
    /**
69
     * Extract url.
70
     *
71
     * @param url the url
72
     * @return the string
73
     */
74
    @Override
75
    public String extractURL(final String url) throws DownloadPluginException {
76
        try {
77
            final String location = getHTTPRedirectedURL(url);
78

  
79
            if (location == null) {
80
                return null;
81
            }
82

  
83
            Document doc = null;
84
            int retries = 0;
85
            boolean success = false;
86

  
87
            while(retries < maxNumberConnectRetries) {
88
                try {
89
                    doc = Jsoup.connect(location).timeout(AbstractDownloadPlugin.DEFAULT_TIMEOUT).get();
90
                    success = true;
91
                    break;
92
                } catch (IOException e) {
93
                    final int millis = BACKOFF_FACTOR * (retries + 1);
94
                    log.debug(String.format("backoff for %s ms before retrying on %s", millis, location));
95
                    Thread.sleep(millis);
96
                }
97
                retries++;
98
            }
99

  
100
            if (!success) {
101
                throw new DownloadPluginException("reached max number of connect retries for URL: " + location);
102
            }
103

  
104
            final Elements links = doc.select("meta[content][name=citation_pdf_url]");
105

  
106
            for (Element link : links) {
107
                String linkValue = link.attr("content");
108
                if (regularExpression != null) {
109
                    for (String regex : regularExpression) {
110
                        if (linkValue.matches(regex)) {
111
                            return linkValue;
112
                        }
113
                    }
114
                } else {
115
                    //if(linkValue.matches("^http.*pdf$")){
116
                        return linkValue;
117
                    //}
118
                }
119

  
120
            }
121
            return null;
122
        } catch (Throwable e) {
123
	        throw new DownloadPluginException("Error on extract URL", e);
124
        }
125
    }
126

  
127
    @Override
128
    public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) {
129
        return Iterables.transform(urls, input -> retrieveUrl(input));
130
    }
131

  
132
    /*
133
     * (non-Javadoc)
134
     *
135
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
136
     */
137
    @Override
138
    public String getPluginName() {
139
        return "DSpacePDFLinkPlugins";
140
    }
141

  
142
    /*
143
     * (non-Javadoc)
144
     *
145
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
146
     */
147
    @Override
148
    public DownloadItem retrieveUrl(final DownloadItem input) {
149
        if (checkOpenAccess(input) == null) return null;
150
        String url = input.getOriginalUrl();
151

  
152
        if ((url == null) || (url.trim().length() == 0)) return input;
153
        @SuppressWarnings("unchecked")
154
        List<String> urls = new Gson().fromJson(url, ArrayList.class);
155
        if ((urls == null) || (urls.size() == 0)) return input;
156
        if (checkUrlsNotNull(input, urls))
157
            return input;
158
        input.setOriginalUrl(null);
159
        input.setUrl(null);
160
        return input;
161
    }
162

  
163
    @Override
164
    public void setBasePath(final String basePath) {
165
        // TODO Auto-generated method stub
166

  
167
    }
168

  
169
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/HindawiDownloadPlugin.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.nio.file.FileSystems;
4
import java.nio.file.Files;
5
import java.nio.file.Path;
6
import java.util.ArrayList;
7
import java.util.List;
8

  
9
import com.google.common.base.Function;
10
import com.google.common.collect.Iterables;
11
import com.google.gson.Gson;
12
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
13
import eu.dnetlib.data.download.rmi.DownloadItem;
14
import eu.dnetlib.data.download.rmi.DownloadPlugin;
15
import eu.dnetlib.data.download.rmi.DownloadPluginException;
16

  
17
/**
18
 * Created by sandro on 3/1/16.
19
 */
20
public class HindawiDownloadPlugin extends AbstractDownloadPlugin implements DownloadPlugin {
21

  
22
	private String basePath;
23

  
24
	private Gson gson;
25

  
26
	public HindawiDownloadPlugin() {
27
		this.gson = new Gson();
28
	}
29

  
30
	@Override
31
	public String extractURL(final String baseURL) throws DownloadPluginException {
32
		return null;
33
	}
34

  
35
	@Override
36
	public void setBasePath(final String basePath) {
37
		this.basePath = basePath;
38
	}
39

  
40
	/*
41
	 * (non-Javadoc)
42
     *
43
     * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getPluginName()
44
     */
45
	@Override
46
	public String getPluginName() {
47
		return "HindawiDownloadPlugin";
48
	}
49

  
50
	@Override
51
	public Iterable<DownloadItem> retrieveUrls(final Iterable<DownloadItem> urls) throws DownloadPluginException {
52
		return Iterables.transform(urls, new Function<DownloadItem, DownloadItem>() {
53

  
54
			@Override
55
			public DownloadItem apply(final DownloadItem input) {
56
				return retrieveUrl(input);
57
			}
58
		});
59
	}
60

  
61
	/*
62
	 * (non-Javadoc)
63
	 *
64
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#retrieveUrl(eu.dnetlib.data.download.rmi.DownloadItem)
65
	 */
66
	@Override
67
	public DownloadItem retrieveUrl(final DownloadItem input) throws DownloadPluginException {
68

  
69
		List<String> inputList = this.gson.fromJson(input.getUrl(), ArrayList.class);
70
		if (inputList == null || inputList.size() == 0) {
71
			input.setUrl(null);
72
			return input;
73
		}
74

  
75
		for (String fileUrl : inputList) {
76
			Path inputhPath = FileSystems.getDefault().getPath(basePath).resolve("." + fileUrl);
77
			if (Files.exists(inputhPath)) {
78
				input.setOriginalUrl("file://" + inputhPath.toString());
79
				input.setUrl("file://" + inputhPath.toString());
80
				return input;
81
			}
82
		}
83
		input.setUrl(null);
84
		return input;
85
	}
86
}
modules/dnet-download-plugins/tags/dnet-download-plugins-2.1.20/src/main/java/eu/dnetlib/download/plugin/FollowPDFLinkPlugins.java
1
package eu.dnetlib.download.plugin;
2

  
3
import java.net.HttpURLConnection;
4
import java.net.URL;
5
import java.util.ArrayList;
6
import java.util.List;
7

  
8
import com.google.common.base.Function;
9
import com.google.common.collect.Iterables;
10
import com.google.gson.Gson;
11
import eu.dnetlib.data.download.rmi.AbstractDownloadPlugin;
12
import eu.dnetlib.data.download.rmi.DownloadItem;
13
import eu.dnetlib.data.download.rmi.DownloadPlugin;
14
import eu.dnetlib.data.download.rmi.DownloadPluginException;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.jsoup.Jsoup;
18
import org.jsoup.nodes.Document;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff