Project

General

Profile

1
package eu.dnetlib.msro.workers.aggregation.collect.plugins.archive.zip;
2

    
3
import java.io.File;
4
import java.net.MalformedURLException;
5
import java.net.URL;
6
import java.util.stream.Stream;
7

    
8
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import org.springframework.stereotype.Component;
15

    
16
// import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
17

    
18
/**
19
 * Collector pluging for collecting a zipped folder of records
20
 *
21
 * @author Andrea
22
 */
23
@Component
24
@DnetCollectorPlugin("zip")
25
public class ZipCollectorPlugin implements CollectorPlugin {
26

    
27
	@Override
28
	public Stream<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
29
			throws CollectException {
30

    
31
		final String baseUrl = interfaceDescriptor.getBaseUrl();
32
		if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectException("Param 'baseurl' is null or empty"); }
33

    
34
		try {
35
			final String zipPath = interfaceDescriptor.getBaseUrl();
36
			final URL zipUrl = new URL(zipPath);
37
			final File zipFile = new File(zipUrl.getPath());
38
			if (!zipFile.exists()) { throw new CollectException(String.format("The base ULR %s, does not exist", zipFile.getPath())); }
39

    
40
			return DnetStreamSupport.stream(new ZipIterator(zipFile.getAbsolutePath()))
41
					.map(s -> s.startsWith("\uFEFF") ? s.substring(1) : s)
42
					.map(XmlCleaner::cleanAllEntities);
43
		} catch (final MalformedURLException e) {
44
			throw new CollectException("Zip collector failed! ", e);
45
		}
46

    
47
	}
48

    
49
}
(1-1/2)