1
|
package eu.dnetlib.data.collector.plugins.filesystem;
|
2
|
|
3
|
import java.io.FileInputStream;
|
4
|
import java.io.IOException;
|
5
|
import java.nio.file.FileVisitResult;
|
6
|
import java.nio.file.FileVisitor;
|
7
|
import java.nio.file.Files;
|
8
|
import java.nio.file.Path;
|
9
|
import java.nio.file.Paths;
|
10
|
import java.nio.file.SimpleFileVisitor;
|
11
|
import java.nio.file.attribute.BasicFileAttributes;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.Iterator;
|
14
|
|
15
|
import org.apache.commons.io.IOUtils;
|
16
|
import org.junit.Assert;
|
17
|
import org.junit.Ignore;
|
18
|
import org.junit.Test;
|
19
|
|
20
|
import eu.dnetlib.data.collector.plugins.filesystem.FilesystemCollectorPlugin;
|
21
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
22
|
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
|
23
|
|
24
|
@Ignore
|
25
|
public class FileSystemCollectorPluginTest {
|
26
|
|
27
|
Path edh = Paths.get("/var/lib/eagle/content/EDH");
|
28
|
Path dai = Paths.get("/var/lib/eagle/content/DAI/arachne-eagle-images-v1-flat");
|
29
|
Path datacite = Paths.get("/media/andrea/xfs/datacite/output");
|
30
|
|
31
|
@Ignore
|
32
|
@Test
|
33
|
public void testCollection() throws CollectorServiceException {
|
34
|
InterfaceDescriptor descr = new InterfaceDescriptor();
|
35
|
HashMap<String, String> params = new HashMap<String, String>();
|
36
|
params.put("extensions", "xml");
|
37
|
descr.setBaseUrl("file:///var/lib/eagle/content/EDH");
|
38
|
descr.setParams(params);
|
39
|
|
40
|
FilesystemCollectorPlugin plugin = new FilesystemCollectorPlugin();
|
41
|
Iterable<String> result = plugin.collect(descr, null, null);
|
42
|
|
43
|
int counter = 0;
|
44
|
double totalTime = 0;
|
45
|
long lastTimestamp = System.currentTimeMillis();
|
46
|
|
47
|
for (String s : result) {
|
48
|
counter++;
|
49
|
if (counter % 10000 == 0) {
|
50
|
double deltaT = (System.currentTimeMillis() - lastTimestamp) / 1000.00;
|
51
|
totalTime += deltaT;
|
52
|
System.out.println("10K records collected in " + deltaT + " seconds");
|
53
|
lastTimestamp = System.currentTimeMillis();
|
54
|
}
|
55
|
Assert.assertNotNull(s);
|
56
|
}
|
57
|
System.out.println("Total " + counter + " in " + totalTime + " seconds");
|
58
|
|
59
|
}
|
60
|
|
61
|
@Ignore
|
62
|
@Test
|
63
|
public void testJavaNioDirectoryStream() throws IOException {
|
64
|
int counter = 0;
|
65
|
double totalTime = 0;
|
66
|
long lastTimestamp = System.currentTimeMillis();
|
67
|
|
68
|
Iterator<Path> pathIterator = Files.newDirectoryStream(edh).iterator();
|
69
|
while (pathIterator.hasNext()) {
|
70
|
Path next = pathIterator.next();
|
71
|
FileInputStream fileInputStream = new FileInputStream(next.toString());
|
72
|
String s = IOUtils.toString(fileInputStream);
|
73
|
counter++;
|
74
|
if (counter % 10000 == 0) {
|
75
|
double deltaT = (System.currentTimeMillis() - lastTimestamp) / 1000.00;
|
76
|
totalTime += deltaT;
|
77
|
System.out.println("10K records collected in " + deltaT + " seconds");
|
78
|
lastTimestamp = System.currentTimeMillis();
|
79
|
}
|
80
|
Assert.assertNotNull(s);
|
81
|
fileInputStream.close();
|
82
|
}
|
83
|
System.out.println("Total " + counter + " in " + totalTime + " seconds");
|
84
|
}
|
85
|
|
86
|
@Test
|
87
|
public void testJavaNioWalkTree() throws IOException {
|
88
|
|
89
|
FileVisitor<Path> fv = new SimpleFileVisitor<Path>() {
|
90
|
|
91
|
int counter = 0;
|
92
|
double totalTime = 0;
|
93
|
long lastTimestamp = System.currentTimeMillis();
|
94
|
|
95
|
@Override
|
96
|
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
97
|
FileInputStream fileInputStream = new FileInputStream(file.toString());
|
98
|
String s = IOUtils.toString(fileInputStream);
|
99
|
Assert.assertNotNull(s);
|
100
|
counter++;
|
101
|
if (counter % 10000 == 0) {
|
102
|
double deltaT = (System.currentTimeMillis() - lastTimestamp) / 1000.00;
|
103
|
totalTime += deltaT;
|
104
|
System.out.println("10K records collected in " + deltaT + " seconds");
|
105
|
lastTimestamp = System.currentTimeMillis();
|
106
|
}
|
107
|
fileInputStream.close();
|
108
|
return FileVisitResult.CONTINUE;
|
109
|
}
|
110
|
};
|
111
|
|
112
|
try {
|
113
|
Files.walkFileTree(edh, fv);
|
114
|
} catch (IOException e) {
|
115
|
e.printStackTrace();
|
116
|
}
|
117
|
}
|
118
|
|
119
|
}
|