Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.filesystem;
2

    
3
import java.io.IOException;
4
import java.nio.file.Files;
5
import java.nio.file.Path;
6
import java.nio.file.Paths;
7
import java.util.Iterator;
8
import java.util.Set;
9

    
10
import org.apache.commons.io.FilenameUtils;
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

    
15
import com.google.common.collect.Iterators;
16
import com.google.common.collect.Sets;
17

    
18
/**
19
 * Class enabling lazy & recursive iteration of a filesystem tree. The iterator iterates over file paths.
20
 *
21
 * @author Andrea
22
 *
23
 */
24
public class FileSystemIterator implements Iterator<String> {
25

    
26
	/** The logger */
27
	private static final Log log = LogFactory.getLog(FileSystemIterator.class);
28

    
29
	private Set<String> extensions = Sets.newHashSet();
30
	private Iterator<Path> pathIterator;
31
	private String current;
32

    
33
	public FileSystemIterator(final String baseDir, final String extensions) {
34
		if(StringUtils.isNotBlank(extensions)) {
35
			this.extensions = Sets.newHashSet(extensions.split(","));
36
		}
37
		try {
38
			this.pathIterator = Files.newDirectoryStream(Paths.get(baseDir)).iterator();
39
			this.current = walkTillNext();
40
		} catch (IOException e) {
41
			log.error("Cannot initialize File System Iterator. Is this path correct? " + baseDir);
42
			throw new RuntimeException("Filesystem collection error.", e);
43
		}
44
	}
45

    
46
	@Override
47
	public boolean hasNext() {
48
		return current != null;
49
	}
50

    
51
	@Override
52
	public synchronized String next() {
53
		String pivot = new String(current);
54
		current = walkTillNext();
55
		log.debug("Returning: " + pivot);
56
		return pivot;
57
	}
58

    
59
	@Override
60
	public void remove() {}
61

    
62
	/**
63
	 * Walk the filesystem recursively until it finds a candidate. Strategies: a) For any directory found during the walk, an iterator is
64
	 * built and concat to the main one; b) Any file is checked against admitted extensions
65
	 *
66
	 * @return the next element to be returned by next call of this.next()
67
	 */
68
	private synchronized String walkTillNext() {
69
		while (pathIterator.hasNext()) {
70
			Path nextFilePath = pathIterator.next();
71
			if (Files.isDirectory(nextFilePath)) {
72
				// concat
73
				try {
74
					pathIterator = Iterators.concat(pathIterator, Files.newDirectoryStream(nextFilePath).iterator());
75
					log.debug("Adding folder iterator: " + nextFilePath.toString());
76
				} catch (IOException e) {
77
					log.error("Cannot create folder iterator! Is this path correct? " + nextFilePath.toString());
78
					return null;
79
				}
80
			} else {
81
				if (extensions.isEmpty() || extensions.contains(FilenameUtils.getExtension(nextFilePath.toString()))) {
82
					log.debug("Returning: " + nextFilePath.toString());
83
					return nextFilePath.toString();
84
				}
85
			}
86
		}
87
		return null;
88
	}
89
}
(1-1/3)