Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.filesystem;
2

    
3
import java.io.IOException;
4
import java.nio.file.Files;
5
import java.nio.file.Path;
6
import java.nio.file.Paths;
7
import java.util.Iterator;
8
import java.util.Set;
9

    
10
import com.google.common.collect.Iterators;
11
import com.google.common.collect.Sets;
12
import org.apache.commons.io.FilenameUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

    
16
/**
17
 * Class enabling lazy & recursive iteration of a filesystem tree. The iterator iterates over file paths.
18
 *
19
 * @author Andrea
20
 *
21
 */
22
public class FileSystemIterator implements Iterator<String> {
23

    
24
	/** The logger */
25
	private static final Log log = LogFactory.getLog(FileSystemIterator.class);
26

    
27
	private Set<String> extensions;
28
	private Iterator<Path> pathIterator;
29
	private String current;
30

    
31
	public FileSystemIterator(final String baseDir, final String extensions) {
32
		this.extensions = Sets.newHashSet(extensions.split(","));
33
		try {
34
			this.pathIterator = Files.newDirectoryStream(Paths.get(baseDir)).iterator();
35
			this.current = walkTillNext();
36
		} catch (IOException e) {
37
			log.error("Cannot initialize File System Iterator. Is this path correct? " + baseDir);
38
			throw new RuntimeException("Filesystem collection error.", e);
39
		}
40
	}
41

    
42
	@Override
43
	public boolean hasNext() {
44
		return current != null;
45
	}
46

    
47
	@Override
48
	public synchronized String next() {
49
		String pivot = new String(current);
50
		current = walkTillNext();
51
		log.debug("Returning: " + pivot);
52
		return pivot;
53
	}
54

    
55
	@Override
56
	public void remove() {}
57

    
58
	/**
59
	 * Walk the filesystem recursively until it finds a candidate. Strategies: a) For any directory found during the walk, an iterator is
60
	 * built and concat to the main one; b) Any file is checked against admitted extensions
61
	 *
62
	 * @return the next element to be returned by next call of this.next()
63
	 */
64
	private synchronized String walkTillNext() {
65
		while (pathIterator.hasNext()) {
66
			Path nextFilePath = pathIterator.next();
67
			if (Files.isDirectory(nextFilePath)) {
68
				// concat
69
				try {
70
					pathIterator = Iterators.concat(pathIterator, Files.newDirectoryStream(nextFilePath).iterator());
71
					log.debug("Adding folder iterator: " + nextFilePath.toString());
72
				} catch (IOException e) {
73
					log.error("Cannot create folder iterator! Is this path correct? " + nextFilePath.toString());
74
					return null;
75
				}
76
			} else {
77
				if (extensions.contains(FilenameUtils.getExtension(nextFilePath.toString()))) {
78
					log.debug("Returning: " + nextFilePath.toString());
79
					return nextFilePath.toString();
80
				}
81
			}
82
		}
83
		return null;
84
	}
85
}
(1-1/3)