Revision 53020
Added by Claudio Atzori over 5 years ago
modules/dnet-download-plugins/trunk/src/test/java/eu/dnetlib/download/plugin/PathRetreiverTest.java | ||
---|---|---|
1 |
package eu.dnetlib.download.plugin; |
|
2 |
|
|
3 |
import org.apache.commons.io.IOUtils; |
|
4 |
import org.apache.commons.lang3.StringUtils; |
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
import org.junit.After; |
|
8 |
import org.junit.Assert; |
|
9 |
import org.junit.Before; |
|
10 |
import org.junit.Test; |
|
11 |
import org.junit.rules.TemporaryFolder; |
|
12 |
|
|
13 |
import java.io.File; |
|
14 |
import java.io.IOException; |
|
15 |
import java.util.Arrays; |
|
16 |
|
|
17 |
public class PathRetreiverTest { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(PathRetreiverTest.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
20 |
|
|
21 |
private TemporaryFolder t = new TemporaryFolder(); |
|
22 |
|
|
23 |
private final PathRetreiver pt = new PathRetreiver(); |
|
24 |
|
|
25 |
@Before |
|
26 |
public void setUp() throws IOException { |
|
27 |
t.create(); |
|
28 |
|
|
29 |
IOUtils.readLines(getClass().getResourceAsStream("pmc_dirs.txt")).forEach(t::newFolder); |
|
30 |
for(final File dir : t.getRoot().listFiles(pathname -> pathname.isDirectory())) { |
|
31 |
int lower = Integer.parseInt(StringUtils.substringBefore(dir.getName(), "_").replaceAll("PMC", "")); |
|
32 |
int upper = Integer.parseInt(StringUtils.substringAfter(dir.getName(), "_").replaceAll("PMC", "")); |
|
33 |
|
|
34 |
for(int i=lower;i<lower+3 && i<upper;i++) { |
|
35 |
t.newFile(dir.getName() + "/" + i + ".xml"); |
|
36 |
} |
|
37 |
for(int i=upper;i>upper-3 && i>lower;i--) { |
|
38 |
t.newFile(dir.getName() + "/" + i + ".xml"); |
|
39 |
} |
|
40 |
} |
|
41 |
|
|
42 |
pt.setBase_path(t.getRoot().getPath()); |
|
43 |
} |
|
44 |
|
|
45 |
|
|
46 |
@Test |
|
47 |
public void testPathRetriever() { |
|
48 |
|
|
49 |
String pathForPMCID = pt.getPathForPMCID(4676029); |
|
50 |
Assert.assertNotNull(pathForPMCID); |
|
51 |
log.info(pathForPMCID); |
|
52 |
|
|
53 |
pathForPMCID = pt.getPathForPMCID(4676028); |
|
54 |
Assert.assertNotNull(pathForPMCID); |
|
55 |
log.info(pathForPMCID); |
|
56 |
|
|
57 |
pathForPMCID = pt.getPathForPMCID(4676026); |
|
58 |
Assert.assertNull(pathForPMCID); |
|
59 |
} |
|
60 |
|
|
61 |
@After |
|
62 |
public void tearDown() { |
|
63 |
t.delete(); |
|
64 |
} |
|
65 |
} |
modules/dnet-download-plugins/trunk/src/test/resources/eu/dnetlib/download/plugin/pmc_dirs.txt | ||
---|---|---|
1 |
PMC2114664_PMC2135116 |
|
2 |
PMC2135117_PMC2173206 |
|
3 |
PMC2173207_PMC2193606 |
|
4 |
PMC2193607_PMC2228428 |
|
5 |
PMC2228429_PMC2289497 |
|
6 |
PMC2289498_PMC2376858 |
|
7 |
PMC2376859_PMC2453635 |
|
8 |
PMC2453636_PMC2562811 |
|
9 |
PMC2562812_PMC2600168 |
|
10 |
PMC2600169_PMC2639698 |
|
11 |
PMC2639699_PMC2679038 |
|
12 |
PMC2679039_PMC2709009 |
|
13 |
PMC2709010_PMC2739494 |
|
14 |
PMC2739495_PMC2774688 |
|
15 |
PMC2774689_PMC2807107 |
|
16 |
PMC2807108_PMC2837157 |
|
17 |
PMC2837158_PMC2872237 |
|
18 |
PMC2872246_PMC2900260 |
|
19 |
PMC2900261_PMC2931455 |
|
20 |
PMC2931456_PMC2959298 |
|
21 |
PMC2959299_PMC2977153 |
|
22 |
PMC2977154_PMC3000089 |
|
23 |
PMC3000090_PMC3018434 |
|
24 |
PMC3018435_PMC3043758 |
|
25 |
PMC3043759_PMC3066749 |
|
26 |
PMC3066750_PMC3094005 |
|
27 |
PMC3094006_PMC3115464 |
|
28 |
PMC3115476_PMC3139354 |
|
29 |
PMC3139355_PMC3161964 |
|
30 |
PMC3161966_PMC3184122 |
|
31 |
PMC3184123_PMC3205810 |
|
32 |
PMC3205811_PMC3226963 |
|
33 |
PMC3226964_PMC3246723 |
|
34 |
PMC3246724_PMC3267738 |
|
35 |
PMC3267739_PMC3287439 |
|
36 |
PMC3287440_PMC3305035 |
|
37 |
PMC3305036_PMC3324463 |
|
38 |
PMC3324464_PMC3342054 |
|
39 |
PMC3342055_PMC3359980 |
|
40 |
PMC3359981_PMC3379654 |
|
41 |
PMC3379655_PMC3400869 |
|
42 |
PMC3400870_PMC3420321 |
|
43 |
PMC3420322_PMC3440431 |
|
44 |
PMC3440432_PMC3465972 |
|
45 |
PMC3465973_PMC3485114 |
|
46 |
PMC3485115_PMC3504117 |
|
47 |
PMC3504118_PMC3521368 |
|
48 |
PMC3521369_PMC3540995 |
|
49 |
PMC3541002_PMC3559779 |
|
50 |
PMC3559780_PMC3579017 |
|
51 |
PMC3579018_PMC3597585 |
|
52 |
PMC3597586_PMC3616740 |
|
53 |
PMC3616741_PMC3637505 |
|
54 |
PMC3637506_PMC3656847 |
|
55 |
PMC3656848_PMC3674696 |
|
56 |
PMC3674697_PMC3694027 |
|
57 |
PMC3694028_PMC3713561 |
|
58 |
PMC3713562_PMC3734723 |
|
59 |
PMC3734724_PMC3756282 |
|
60 |
PMC3756283_PMC3775526 |
|
61 |
PMC3775527_PMC3794997 |
|
62 |
PMC3794998_PMC3817535 |
|
63 |
PMC3817536_PMC3834982 |
|
64 |
PMC3834984_PMC3851930 |
|
65 |
PMC3851934_PMC3870407 |
|
66 |
PMC3870408_PMC3888268 |
|
67 |
PMC3888269_PMC3905545 |
|
68 |
PMC3905546_PMC3923943 |
|
69 |
PMC3923951_PMC3942209 |
|
70 |
PMC3942210_PMC3962119 |
|
71 |
PMC3962121_PMC3980435 |
|
72 |
PMC3980436_PMC3997492 |
|
73 |
PMC3997493_PMC4016582 |
|
74 |
PMC4016583_PMC4034045 |
|
75 |
PMC4034053_PMC4052667 |
|
76 |
PMC4052668_PMC4069525 |
|
77 |
PMC4069526_PMC4087078 |
|
78 |
PMC4087079_PMC4104043 |
|
79 |
PMC4104070_PMC4123881 |
|
80 |
PMC4123882_PMC4143513 |
|
81 |
PMC4143514_PMC4161060 |
|
82 |
PMC4161143_PMC4179239 |
|
83 |
PMC4179240_PMC4196090 |
|
84 |
PMC4196091_PMC4213380 |
|
85 |
PMC4213381_PMC4229908 |
|
86 |
PMC4229909_PMC4246363 |
|
87 |
PMC4246371_PMC4265290 |
|
88 |
PMC4265291_PMC4283529 |
|
89 |
PMC4283530_PMC4301549 |
|
90 |
PMC4301550_PMC4320116 |
|
91 |
PMC4320117_PMC4336280 |
|
92 |
PMC4336281_PMC4354467 |
|
93 |
PMC4354468_PMC4372225 |
|
94 |
PMC4372226_PMC4389034 |
|
95 |
PMC4389035_PMC4406610 |
|
96 |
PMC4406611_PMC4423035 |
|
97 |
PMC4423048_PMC4439555 |
|
98 |
PMC4439556_PMC4456817 |
|
99 |
PMC4456818_PMC4474724 |
|
100 |
PMC4474725_PMC4491649 |
|
101 |
PMC4491650_PMC4508683 |
|
102 |
PMC4508684_PMC4525193 |
|
103 |
PMC4525194_PMC4543216 |
|
104 |
PMC4543217_PMC4560130 |
|
105 |
PMC4560131_PMC4579196 |
|
106 |
PMC4579197_PMC4595007 |
|
107 |
PMC4595008_PMC4609474 |
|
108 |
PMC4609475_PMC4626773 |
|
109 |
PMC4626774_PMC4643236 |
|
110 |
PMC4643237_PMC4660323 |
|
111 |
PMC4660324_PMC4676029 |
|
112 |
PMC4676030_PMC4693769 |
|
113 |
PMC4693770_PMC4710023 |
|
114 |
PMC4710024_PMC4728071 |
|
115 |
PMC4728075_PMC4743341 |
|
116 |
PMC4743342_PMC4758928 |
|
117 |
PMC4758929_PMC4775014 |
|
118 |
PMC4775015_PMC4793100 |
|
119 |
PMC4793101_PMC4809441 |
|
120 |
PMC4809442_PMC4826653 |
|
121 |
PMC4826654_PMC4844069 |
|
122 |
PMC4844073_PMC4859985 |
|
123 |
PMC4859986_PMC4877359 |
|
124 |
PMC4877365_PMC4894427 |
|
125 |
PMC4894428_PMC4910313 |
|
126 |
PMC4910314_PMC4928029 |
|
127 |
PMC4928030_PMC4943888 |
|
128 |
PMC4943889_PMC4963471 |
|
129 |
PMC4963472_PMC4979068 |
|
130 |
PMC4979069_PMC4996129 |
|
131 |
PMC4996130_PMC5011754 |
|
132 |
PMC5011755_PMC5027112 |
|
133 |
PMC5027113_PMC5041839 |
|
134 |
PMC5041840_PMC5055367 |
|
135 |
PMC5055368_PMC5070294 |
|
136 |
PMC5070295_PMC5084992 |
|
137 |
PMC5084998_PMC5098989 |
|
138 |
PMC5098990_PMC5113742 |
|
139 |
PMC5113743_PMC5128874 |
|
140 |
PMC5128875_PMC5142017 |
|
141 |
PMC5142018_PMC5155182 |
|
142 |
PMC5155183_PMC5168587 |
|
143 |
PMC5168588_PMC5181444 |
|
144 |
PMC5181445_PMC5195159 |
|
145 |
PMC5195160_PMC5207976 |
|
146 |
PMC5207977_PMC5220330 |
|
147 |
PMC5220331_PMC5232993 |
|
148 |
PMC5232994_PMC5245953 |
|
149 |
PMC5245954_PMC5258212 |
|
150 |
PMC5258213_PMC5269906 |
|
151 |
PMC5269907_PMC5281504 |
|
152 |
PMC5281505_PMC5294573 |
|
153 |
PMC5294574_PMC5307639 |
|
154 |
PMC5307640_PMC5320633 |
|
155 |
PMC5320634_PMC5334419 |
|
156 |
PMC5334420_PMC5348122 |
|
157 |
PMC5348123_PMC5362478 |
|
158 |
PMC5362479_PMC5379575 |
|
159 |
PMC5379578_PMC5393849 |
|
160 |
PMC5393850_PMC5409456 |
|
161 |
PMC5409462_PMC5424203 |
|
162 |
PMC5424204_PMC5437630 |
|
163 |
PMC5437631_PMC5452760 |
|
164 |
PMC5452761_PMC5467055 |
|
165 |
PMC5467056_PMC5483227 |
|
166 |
PMC5483229_PMC5497534 |
|
167 |
PMC549050_PMC1240576 |
|
168 |
PMC5497535_PMC5513250 |
|
169 |
PMC5513251_PMC5528017 |
|
170 |
PMC5528018_PMC5544983 |
|
171 |
PMC5544984_PMC5561184 |
|
172 |
PMC5561185_PMC5576750 |
|
173 |
PMC5576751_PMC5591643 |
|
174 |
PMC5591783_PMC5608430 |
|
175 |
PMC5608431_PMC5623045 |
|
176 |
PMC5623046_PMC5637514 |
|
177 |
PMC5637515_PMC5653330 |
|
178 |
PMC5653331_PMC5669206 |
|
179 |
PMC5669215_PMC5686621 |
|
180 |
PMC5686622_PMC5703149 |
|
181 |
PMC5703150_PMC5718512 |
|
182 |
PMC5718513_PMC5732977 |
|
183 |
PMC5732978_PMC5748310 |
|
184 |
PMC5748311_PMC5763634 |
|
185 |
PMC5763635_PMC5779436 |
|
186 |
PMC5779437_PMC5794819 |
|
187 |
PMC5794820_PMC5810839 |
|
188 |
PMC5810840_PMC5827115 |
|
189 |
PMC5827136_PMC5842751 |
|
190 |
PMC5842752_PMC5857142 |
|
191 |
PMC5857143_PMC5872445 |
|
192 |
PMC5872446_PMC5887736 |
|
193 |
PMC5887737_PMC5903694 |
|
194 |
PMC5903695_PMC5920276 |
|
195 |
PMC5920278_PMC5938386 |
|
196 |
PMC5938387_PMC5956503 |
|
197 |
PMC5956504_PMC5975655 |
|
198 |
PMC5975656_PMC5991267 |
|
199 |
PMC5991268_PMC6006820 |
|
200 |
PMC6006821_PMC6022687 |
|
201 |
PMC6022688_PMC6036291 |
|
202 |
PMC6036292_PMC6053207 |
|
203 |
PMC6053208_PMC6069616 |
|
204 |
PMC6069617_PMC6090019 |
|
205 |
PMC6090020_PMC6105291 |
modules/dnet-download-plugins/trunk/src/main/java/eu/dnetlib/download/plugin/PathRetreiver.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.File; |
4 | 4 |
import java.io.FileFilter; |
5 |
import java.util.ArrayList; |
|
6 |
import java.util.Collections; |
|
7 |
import java.util.Comparator; |
|
8 |
import java.util.List; |
|
5 |
import java.util.*; |
|
9 | 6 |
|
10 | 7 |
import org.apache.commons.lang3.StringUtils; |
11 | 8 |
import org.apache.commons.logging.Log; |
12 | 9 |
import org.apache.commons.logging.LogFactory; |
13 | 10 |
|
14 |
// TODO: Auto-generated Javadoc |
|
15 | 11 |
/** |
16 | 12 |
* The Class PathRetreiver. |
17 | 13 |
*/ |
... | ... | |
41 | 37 |
private String base_path; |
42 | 38 |
|
43 | 39 |
/** The values. */ |
44 |
private List<InfoPath> values;
|
|
40 |
private TreeMap<Integer, InfoPath> values;
|
|
45 | 41 |
|
46 | 42 |
/** |
47 | 43 |
* Bootstrap. |
48 | 44 |
*/ |
49 | 45 |
private void bootstrap() { |
50 |
values = new ArrayList<InfoPath>();
|
|
46 |
values = new TreeMap<>();
|
|
51 | 47 |
File basePath = new File(this.base_path); |
52 |
File[] selectedFiles = basePath.listFiles(new FileFilter() {
|
|
48 |
File[] selectedFiles = basePath.listFiles(pathname -> pathname.isDirectory());
|
|
53 | 49 |
|
54 |
@Override |
|
55 |
public boolean accept(final File pathname) { |
|
56 |
return pathname.isDirectory(); |
|
57 |
} |
|
58 |
}); |
|
59 |
|
|
60 | 50 |
for (File f : selectedFiles) { |
61 | 51 |
String lower = StringUtils.substringAfter(StringUtils.substringBefore(f.getName(), "_"), "PMC"); |
62 | 52 |
String upper = StringUtils.substringAfter(StringUtils.substringAfter(f.getName(), "_"), "PMC"); |
... | ... | |
65 | 55 |
i.setLower(Integer.parseInt(lower)); |
66 | 56 |
i.setUpper(Integer.parseInt(upper)); |
67 | 57 |
i.setPath(path); |
68 |
values.add(i);
|
|
58 |
values.put(i.getLower(), i);
|
|
69 | 59 |
} |
70 | 60 |
|
71 |
Collections.sort(values, new Comparator<InfoPath>() { |
|
72 | 61 |
|
73 |
@Override |
|
74 |
public int compare(final InfoPath o1, final InfoPath o2) { |
|
75 |
if (o1.getLower() < o2.getLower()) return -1; |
|
76 |
else if (o1.getLower() < o2.getLower()) return 0; |
|
77 |
else return 1; |
|
78 |
} |
|
79 |
}); |
|
80 | 62 |
if (log.isDebugEnabled()) { |
81 |
for (InfoPath p : values) { |
|
63 |
for (InfoPath p : values.values()) {
|
|
82 | 64 |
log.debug(String.format("%s -- %s : %s", p.getLower(), p.getUpper(), p.getPath())); |
83 | 65 |
} |
84 | 66 |
} |
85 |
|
|
86 | 67 |
} |
87 | 68 |
|
88 | 69 |
/** |
... | ... | |
96 | 77 |
if (values == null) { |
97 | 78 |
bootstrap(); |
98 | 79 |
} |
99 |
for (int i = 0; i < values.size(); i++) { |
|
100 |
if (pmcID < values.get(i).getLower()) { |
|
101 |
if (i == 0) return null; |
|
102 |
String currentPath = values.get(i - 1).getPath() + "/" + pmcID + ".xml"; |
|
103 |
File f = new File(currentPath); |
|
104 |
log.debug(String.format("try to search in path %s", currentPath)); |
|
105 |
String s = null; |
|
106 |
if (f.exists()) { |
|
107 |
s = f.getPath(); |
|
108 |
log.debug(String.format("found in %s", s)); |
|
109 |
} else { |
|
110 |
log.debug(String.format("not found in %s", s)); |
|
111 |
} |
|
112 |
return s; |
|
113 | 80 |
|
81 |
Map.Entry<Integer, InfoPath> infoPath = values.floorEntry(pmcID); |
|
82 |
if (infoPath != null) { |
|
83 |
|
|
84 |
final String currentPath = infoPath.getValue().getPath() + "/" + pmcID + ".xml"; |
|
85 |
final File f = new File(currentPath); |
|
86 |
log.debug(String.format("try to search in path %s", currentPath)); |
|
87 |
String s = null; |
|
88 |
if (f.exists()) { |
|
89 |
s = f.getPath(); |
|
90 |
log.debug(String.format("found in %s", s)); |
|
91 |
} else { |
|
92 |
log.debug(String.format("not found in %s", s)); |
|
114 | 93 |
} |
94 |
return s; |
|
115 | 95 |
} |
116 | 96 |
log.debug(String.format("PMC with ID: %s not found", pmcID)); |
117 | 97 |
return null; |
Also available in: Unified diff
reimplemented businness logic for PathRetreiver