Project

General

Profile

« Previous | Next » 

Revision 53020

reimplemented businness logic for PathRetreiver

View differences:

modules/dnet-download-plugins/trunk/src/test/java/eu/dnetlib/download/plugin/PathRetreiverTest.java
1
package eu.dnetlib.download.plugin;
2

  
3
import org.apache.commons.io.IOUtils;
4
import org.apache.commons.lang3.StringUtils;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7
import org.junit.After;
8
import org.junit.Assert;
9
import org.junit.Before;
10
import org.junit.Test;
11
import org.junit.rules.TemporaryFolder;
12

  
13
import java.io.File;
14
import java.io.IOException;
15
import java.util.Arrays;
16

  
17
public class PathRetreiverTest {
18

  
19
    private static final Log log = LogFactory.getLog(PathRetreiverTest.class); // NOPMD by marko on 11/24/08 5:02 PM
20

  
21
    private TemporaryFolder t = new TemporaryFolder();
22

  
23
    private final PathRetreiver pt = new PathRetreiver();
24

  
25
    @Before
26
    public void setUp() throws IOException {
27
        t.create();
28

  
29
        IOUtils.readLines(getClass().getResourceAsStream("pmc_dirs.txt")).forEach(t::newFolder);
30
        for(final File dir : t.getRoot().listFiles(pathname -> pathname.isDirectory())) {
31
            int lower = Integer.parseInt(StringUtils.substringBefore(dir.getName(), "_").replaceAll("PMC", ""));
32
            int upper = Integer.parseInt(StringUtils.substringAfter(dir.getName(), "_").replaceAll("PMC", ""));
33

  
34
            for(int i=lower;i<lower+3 && i<upper;i++) {
35
               t.newFile(dir.getName() + "/" + i + ".xml");
36
            }
37
            for(int i=upper;i>upper-3 && i>lower;i--) {
38
                t.newFile(dir.getName() + "/" + i + ".xml");
39
            }
40
        }
41

  
42
        pt.setBase_path(t.getRoot().getPath());
43
    }
44

  
45

  
46
    @Test
47
    public void testPathRetriever() {
48

  
49
        String pathForPMCID = pt.getPathForPMCID(4676029);
50
        Assert.assertNotNull(pathForPMCID);
51
        log.info(pathForPMCID);
52

  
53
        pathForPMCID = pt.getPathForPMCID(4676028);
54
        Assert.assertNotNull(pathForPMCID);
55
        log.info(pathForPMCID);
56

  
57
        pathForPMCID = pt.getPathForPMCID(4676026);
58
        Assert.assertNull(pathForPMCID);
59
    }
60

  
61
    @After
62
    public void tearDown() {
63
        t.delete();
64
    }
65
}
modules/dnet-download-plugins/trunk/src/test/resources/eu/dnetlib/download/plugin/pmc_dirs.txt
1
PMC2114664_PMC2135116
2
PMC2135117_PMC2173206
3
PMC2173207_PMC2193606
4
PMC2193607_PMC2228428
5
PMC2228429_PMC2289497
6
PMC2289498_PMC2376858
7
PMC2376859_PMC2453635
8
PMC2453636_PMC2562811
9
PMC2562812_PMC2600168
10
PMC2600169_PMC2639698
11
PMC2639699_PMC2679038
12
PMC2679039_PMC2709009
13
PMC2709010_PMC2739494
14
PMC2739495_PMC2774688
15
PMC2774689_PMC2807107
16
PMC2807108_PMC2837157
17
PMC2837158_PMC2872237
18
PMC2872246_PMC2900260
19
PMC2900261_PMC2931455
20
PMC2931456_PMC2959298
21
PMC2959299_PMC2977153
22
PMC2977154_PMC3000089
23
PMC3000090_PMC3018434
24
PMC3018435_PMC3043758
25
PMC3043759_PMC3066749
26
PMC3066750_PMC3094005
27
PMC3094006_PMC3115464
28
PMC3115476_PMC3139354
29
PMC3139355_PMC3161964
30
PMC3161966_PMC3184122
31
PMC3184123_PMC3205810
32
PMC3205811_PMC3226963
33
PMC3226964_PMC3246723
34
PMC3246724_PMC3267738
35
PMC3267739_PMC3287439
36
PMC3287440_PMC3305035
37
PMC3305036_PMC3324463
38
PMC3324464_PMC3342054
39
PMC3342055_PMC3359980
40
PMC3359981_PMC3379654
41
PMC3379655_PMC3400869
42
PMC3400870_PMC3420321
43
PMC3420322_PMC3440431
44
PMC3440432_PMC3465972
45
PMC3465973_PMC3485114
46
PMC3485115_PMC3504117
47
PMC3504118_PMC3521368
48
PMC3521369_PMC3540995
49
PMC3541002_PMC3559779
50
PMC3559780_PMC3579017
51
PMC3579018_PMC3597585
52
PMC3597586_PMC3616740
53
PMC3616741_PMC3637505
54
PMC3637506_PMC3656847
55
PMC3656848_PMC3674696
56
PMC3674697_PMC3694027
57
PMC3694028_PMC3713561
58
PMC3713562_PMC3734723
59
PMC3734724_PMC3756282
60
PMC3756283_PMC3775526
61
PMC3775527_PMC3794997
62
PMC3794998_PMC3817535
63
PMC3817536_PMC3834982
64
PMC3834984_PMC3851930
65
PMC3851934_PMC3870407
66
PMC3870408_PMC3888268
67
PMC3888269_PMC3905545
68
PMC3905546_PMC3923943
69
PMC3923951_PMC3942209
70
PMC3942210_PMC3962119
71
PMC3962121_PMC3980435
72
PMC3980436_PMC3997492
73
PMC3997493_PMC4016582
74
PMC4016583_PMC4034045
75
PMC4034053_PMC4052667
76
PMC4052668_PMC4069525
77
PMC4069526_PMC4087078
78
PMC4087079_PMC4104043
79
PMC4104070_PMC4123881
80
PMC4123882_PMC4143513
81
PMC4143514_PMC4161060
82
PMC4161143_PMC4179239
83
PMC4179240_PMC4196090
84
PMC4196091_PMC4213380
85
PMC4213381_PMC4229908
86
PMC4229909_PMC4246363
87
PMC4246371_PMC4265290
88
PMC4265291_PMC4283529
89
PMC4283530_PMC4301549
90
PMC4301550_PMC4320116
91
PMC4320117_PMC4336280
92
PMC4336281_PMC4354467
93
PMC4354468_PMC4372225
94
PMC4372226_PMC4389034
95
PMC4389035_PMC4406610
96
PMC4406611_PMC4423035
97
PMC4423048_PMC4439555
98
PMC4439556_PMC4456817
99
PMC4456818_PMC4474724
100
PMC4474725_PMC4491649
101
PMC4491650_PMC4508683
102
PMC4508684_PMC4525193
103
PMC4525194_PMC4543216
104
PMC4543217_PMC4560130
105
PMC4560131_PMC4579196
106
PMC4579197_PMC4595007
107
PMC4595008_PMC4609474
108
PMC4609475_PMC4626773
109
PMC4626774_PMC4643236
110
PMC4643237_PMC4660323
111
PMC4660324_PMC4676029
112
PMC4676030_PMC4693769
113
PMC4693770_PMC4710023
114
PMC4710024_PMC4728071
115
PMC4728075_PMC4743341
116
PMC4743342_PMC4758928
117
PMC4758929_PMC4775014
118
PMC4775015_PMC4793100
119
PMC4793101_PMC4809441
120
PMC4809442_PMC4826653
121
PMC4826654_PMC4844069
122
PMC4844073_PMC4859985
123
PMC4859986_PMC4877359
124
PMC4877365_PMC4894427
125
PMC4894428_PMC4910313
126
PMC4910314_PMC4928029
127
PMC4928030_PMC4943888
128
PMC4943889_PMC4963471
129
PMC4963472_PMC4979068
130
PMC4979069_PMC4996129
131
PMC4996130_PMC5011754
132
PMC5011755_PMC5027112
133
PMC5027113_PMC5041839
134
PMC5041840_PMC5055367
135
PMC5055368_PMC5070294
136
PMC5070295_PMC5084992
137
PMC5084998_PMC5098989
138
PMC5098990_PMC5113742
139
PMC5113743_PMC5128874
140
PMC5128875_PMC5142017
141
PMC5142018_PMC5155182
142
PMC5155183_PMC5168587
143
PMC5168588_PMC5181444
144
PMC5181445_PMC5195159
145
PMC5195160_PMC5207976
146
PMC5207977_PMC5220330
147
PMC5220331_PMC5232993
148
PMC5232994_PMC5245953
149
PMC5245954_PMC5258212
150
PMC5258213_PMC5269906
151
PMC5269907_PMC5281504
152
PMC5281505_PMC5294573
153
PMC5294574_PMC5307639
154
PMC5307640_PMC5320633
155
PMC5320634_PMC5334419
156
PMC5334420_PMC5348122
157
PMC5348123_PMC5362478
158
PMC5362479_PMC5379575
159
PMC5379578_PMC5393849
160
PMC5393850_PMC5409456
161
PMC5409462_PMC5424203
162
PMC5424204_PMC5437630
163
PMC5437631_PMC5452760
164
PMC5452761_PMC5467055
165
PMC5467056_PMC5483227
166
PMC5483229_PMC5497534
167
PMC549050_PMC1240576
168
PMC5497535_PMC5513250
169
PMC5513251_PMC5528017
170
PMC5528018_PMC5544983
171
PMC5544984_PMC5561184
172
PMC5561185_PMC5576750
173
PMC5576751_PMC5591643
174
PMC5591783_PMC5608430
175
PMC5608431_PMC5623045
176
PMC5623046_PMC5637514
177
PMC5637515_PMC5653330
178
PMC5653331_PMC5669206
179
PMC5669215_PMC5686621
180
PMC5686622_PMC5703149
181
PMC5703150_PMC5718512
182
PMC5718513_PMC5732977
183
PMC5732978_PMC5748310
184
PMC5748311_PMC5763634
185
PMC5763635_PMC5779436
186
PMC5779437_PMC5794819
187
PMC5794820_PMC5810839
188
PMC5810840_PMC5827115
189
PMC5827136_PMC5842751
190
PMC5842752_PMC5857142
191
PMC5857143_PMC5872445
192
PMC5872446_PMC5887736
193
PMC5887737_PMC5903694
194
PMC5903695_PMC5920276
195
PMC5920278_PMC5938386
196
PMC5938387_PMC5956503
197
PMC5956504_PMC5975655
198
PMC5975656_PMC5991267
199
PMC5991268_PMC6006820
200
PMC6006821_PMC6022687
201
PMC6022688_PMC6036291
202
PMC6036292_PMC6053207
203
PMC6053208_PMC6069616
204
PMC6069617_PMC6090019
205
PMC6090020_PMC6105291
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/PathRetreiver.java
2 2

  
3 3
import java.io.File;
4 4
import java.io.FileFilter;
5
import java.util.ArrayList;
6
import java.util.Collections;
7
import java.util.Comparator;
8
import java.util.List;
5
import java.util.*;
9 6

  
10 7
import org.apache.commons.lang3.StringUtils;
11 8
import org.apache.commons.logging.Log;
12 9
import org.apache.commons.logging.LogFactory;
13 10

  
14
// TODO: Auto-generated Javadoc
15 11
/**
16 12
 * The Class PathRetreiver.
17 13
 */
......
41 37
	private String base_path;
42 38

  
43 39
	/** The values. */
44
	private List<InfoPath> values;
40
	private TreeMap<Integer, InfoPath> values;
45 41

  
46 42
	/**
47 43
	 * Bootstrap.
48 44
	 */
49 45
	private void bootstrap() {
50
		values = new ArrayList<InfoPath>();
46
		values = new TreeMap<>();
51 47
		File basePath = new File(this.base_path);
52
		File[] selectedFiles = basePath.listFiles(new FileFilter() {
48
		File[] selectedFiles = basePath.listFiles(pathname -> pathname.isDirectory());
53 49

  
54
			@Override
55
			public boolean accept(final File pathname) {
56
				return pathname.isDirectory();
57
			}
58
		});
59

  
60 50
		for (File f : selectedFiles) {
61 51
			String lower = StringUtils.substringAfter(StringUtils.substringBefore(f.getName(), "_"), "PMC");
62 52
			String upper = StringUtils.substringAfter(StringUtils.substringAfter(f.getName(), "_"), "PMC");
......
65 55
			i.setLower(Integer.parseInt(lower));
66 56
			i.setUpper(Integer.parseInt(upper));
67 57
			i.setPath(path);
68
			values.add(i);
58
			values.put(i.getLower(), i);
69 59
		}
70 60

  
71
		Collections.sort(values, new Comparator<InfoPath>() {
72 61

  
73
			@Override
74
			public int compare(final InfoPath o1, final InfoPath o2) {
75
				if (o1.getLower() < o2.getLower()) return -1;
76
				else if (o1.getLower() < o2.getLower()) return 0;
77
				else return 1;
78
			}
79
		});
80 62
		if (log.isDebugEnabled()) {
81
			for (InfoPath p : values) {
63
			for (InfoPath p : values.values()) {
82 64
				log.debug(String.format("%s -- %s : %s", p.getLower(), p.getUpper(), p.getPath()));
83 65
			}
84 66
		}
85

  
86 67
	}
87 68

  
88 69
	/**
......
96 77
		if (values == null) {
97 78
			bootstrap();
98 79
		}
99
		for (int i = 0; i < values.size(); i++) {
100
			if (pmcID < values.get(i).getLower()) {
101
				if (i == 0) return null;
102
				String currentPath = values.get(i - 1).getPath() + "/" + pmcID + ".xml";
103
				File f = new File(currentPath);
104
				log.debug(String.format("try to search in path %s", currentPath));
105
				String s = null;
106
				if (f.exists()) {
107
					s = f.getPath();
108
					log.debug(String.format("found in %s", s));
109
				} else {
110
					log.debug(String.format("not found in %s", s));
111
				}
112
				return s;
113 80

  
81
		Map.Entry<Integer, InfoPath> infoPath = values.floorEntry(pmcID);
82
		if (infoPath != null) {
83

  
84
			final String currentPath = infoPath.getValue().getPath() + "/" + pmcID + ".xml";
85
			final File f = new File(currentPath);
86
			log.debug(String.format("try to search in path %s", currentPath));
87
			String s = null;
88
			if (f.exists()) {
89
				s = f.getPath();
90
				log.debug(String.format("found in %s", s));
91
			} else {
92
				log.debug(String.format("not found in %s", s));
114 93
			}
94
			return s;
115 95
		}
116 96
		log.debug(String.format("PMC with ID: %s not found", pmcID));
117 97
		return null;

Also available in: Unified diff