1
|
package eu.dnetlib.msro.workers.aggregation.collect.plugins.filesystem;
|
2
|
|
3
|
import java.io.File;
|
4
|
import java.io.FileInputStream;
|
5
|
import java.net.MalformedURLException;
|
6
|
import java.net.URL;
|
7
|
import java.util.stream.Stream;
|
8
|
|
9
|
import org.apache.commons.io.IOUtils;
|
10
|
import org.apache.commons.logging.Log;
|
11
|
import org.apache.commons.logging.LogFactory;
|
12
|
import org.springframework.stereotype.Component;
|
13
|
|
14
|
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
|
15
|
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
|
16
|
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
|
17
|
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
|
18
|
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
|
19
|
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
|
20
|
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
|
21
|
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
|
22
|
|
23
|
/**
|
24
|
* @author andrea
|
25
|
*/
|
26
|
@Component
|
27
|
@DnetCollectorPlugin(value = "filesystem", parameters = {
|
28
|
@DnetCollectorParam(value = "extensions", type = ProtocolParameterType.LIST)
|
29
|
})
|
30
|
public class FilesystemCollectorPlugin implements CollectorPlugin {
|
31
|
|
32
|
private static final Log log = LogFactory.getLog(FilesystemCollectorPlugin.class);
|
33
|
|
34
|
@Override
|
35
|
public Stream<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
|
36
|
throws CollectException {
|
37
|
|
38
|
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
39
|
if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectException("Param 'baseurl' is null or empty"); }
|
40
|
|
41
|
try {
|
42
|
final URL basePath = new URL(baseUrl);
|
43
|
final File baseDir = new File(basePath.getPath());
|
44
|
if (!baseDir.exists()) { throw new CollectException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
|
45
|
final String extension = interfaceDescriptor.getParams().get("extensions");
|
46
|
|
47
|
return DnetStreamSupport.stream(new FileSystemIterator(baseDir.getAbsolutePath(), extension))
|
48
|
.map(inputFileName -> {
|
49
|
try (FileInputStream fileInputStream = new FileInputStream(inputFileName)) {
|
50
|
final String s = IOUtils.toString(fileInputStream);
|
51
|
return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
|
52
|
} catch (final Exception e) {
|
53
|
log.error("Unable to read " + inputFileName);
|
54
|
return "";
|
55
|
}
|
56
|
});
|
57
|
} catch (final MalformedURLException e) {
|
58
|
throw new CollectException("Filesystem collector failed! ", e);
|
59
|
}
|
60
|
|
61
|
}
|
62
|
|
63
|
}
|