Project

General

Profile

1
package eu.dnetlib.msro.workers.aggregation.collect.plugins.filesystem;
2

    
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.net.MalformedURLException;
6
import java.net.URL;
7
import java.util.stream.Stream;
8

    
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.springframework.stereotype.Component;
13

    
14
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
15
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
16
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
17
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
18
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
19
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
20
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
21
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
22

    
23
/**
24
 * @author andrea
25
 */
26
@Component
27
@DnetCollectorPlugin(value = "filesystem", parameters = {
28
		@DnetCollectorParam(value = "extensions", type = ProtocolParameterType.LIST)
29
})
30
public class FilesystemCollectorPlugin implements CollectorPlugin {
31

    
32
	private static final Log log = LogFactory.getLog(FilesystemCollectorPlugin.class);
33

    
34
	@Override
35
	public Stream<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
36
			throws CollectException {
37

    
38
		final String baseUrl = interfaceDescriptor.getBaseUrl();
39
		if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectException("Param 'baseurl' is null or empty"); }
40

    
41
		try {
42
			final URL basePath = new URL(baseUrl);
43
			final File baseDir = new File(basePath.getPath());
44
			if (!baseDir.exists()) { throw new CollectException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
45
			final String extension = interfaceDescriptor.getParams().get("extensions");
46

    
47
			return DnetStreamSupport.stream(new FileSystemIterator(baseDir.getAbsolutePath(), extension))
48
					.map(inputFileName -> {
49
						try (FileInputStream fileInputStream = new FileInputStream(inputFileName)) {
50
							final String s = IOUtils.toString(fileInputStream);
51
							return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
52
						} catch (final Exception e) {
53
							log.error("Unable to read " + inputFileName);
54
							return "";
55
						}
56
					});
57
		} catch (final MalformedURLException e) {
58
			throw new CollectException("Filesystem collector failed! ", e);
59
		}
60

    
61
	}
62

    
63
}
(2-2/2)