Project

General

Profile

« Previous | Next » 

Revision 47519

FS Plugin: for claims we need to be able to collect json files

View differences:

FilesystemIterable.java
6 6
import java.net.MalformedURLException;
7 7
import java.net.URL;
8 8
import java.util.Iterator;
9
import java.util.List;
9 10

  
10
import org.apache.commons.io.IOUtils;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13

  
14
import com.google.common.base.Function;
15 11
import com.google.common.collect.Iterators;
16

  
12
import com.google.common.collect.Lists;
17 13
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
18 14
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
19 15
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
16
import org.apache.commons.io.IOUtils;
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.json.JSONObject;
21
import org.json.XML;
20 22

  
21 23
/**
22 24
 * The Class FilesystemIterable.
......
25 27
 */
26 28
public class FilesystemIterable implements Iterable<String> {
27 29

  
30

  
28 31
	/** The Constant log. */
29 32
	private static final Log log = LogFactory.getLog(FilesystemIterable.class);
30 33

  
......
32 35
	private File baseDir;
33 36

  
34 37
	/** The extensions. */
35
	private String extension;
38
	private String extensions;
36 39

  
40
	/** File format (json / xml) **/
41
	private String fileFormat = "xml";
42

  
43
	private List<String> supportedFormats = Lists.newArrayList("xml", "json");
44

  
37 45
	/**
38 46
	 * Instantiates a new filesystem iterable.
39 47
	 *
......
48 56
			URL basePath = new URL(baseUrl);
49 57
			this.baseDir = new File(basePath.getPath());
50 58
			if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
51
			this.extension = descriptor.getParams().get("extensions");
59
			this.extensions = descriptor.getParams().get("extensions");
60
			if(descriptor.getParams().containsKey("fileFormat")) fileFormat = descriptor.getParams().get("fileFormat");
61
			if(!supportedFormats.contains(fileFormat)) throw new CollectorServiceException("File format "+fileFormat+" not supported. Supported formats are: "+ StringUtils
62
					.join(supportedFormats, ','));
52 63
		} catch (MalformedURLException e) {
53 64
			throw new CollectorServiceException("Filesystem collector failed! ", e);
54 65
		}
......
61 72
	 */
62 73
	@Override
63 74
	public Iterator<String> iterator() {
64
		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extension);
65
		return Iterators.transform(fsi, new Function<String, String>() {
66

  
67
			@Override
68
			public String apply(final String inputFileName) {
69
				FileInputStream fileInputStream = null;
70
				try {
71
					fileInputStream = new FileInputStream(inputFileName);
72
					final String s = IOUtils.toString(fileInputStream);
73
					return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
74
				} catch (Exception e) {
75
					log.error("Unable to read " + inputFileName);
76
					return "";
77
				} finally {
78
					if (fileInputStream != null) {
79
						try {
80
							fileInputStream.close();
81
						} catch (IOException e) {
82
							log.error("Unable to close inputstream for  " + inputFileName);
83
						}
75
		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extensions);
76
		return Iterators.transform(fsi, inputFileName -> {
77
			FileInputStream fileInputStream = null;
78
			try {
79
				fileInputStream = new FileInputStream(inputFileName);
80
				final String s = IOUtils.toString(fileInputStream);
81
				if(fileFormat.equalsIgnoreCase("json")){
82
					JSONObject json = new JSONObject(s);
83
					return XML.toString(json, "record");
84
				}
85
				return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
86
			} catch (Exception e) {
87
				log.error("Unable to read " + inputFileName);
88
				return "";
89
			} finally {
90
				if (fileInputStream != null) {
91
					try {
92
						fileInputStream.close();
93
					} catch (IOException e) {
94
						log.error("Unable to close inputstream for  " + inputFileName);
84 95
					}
85 96
				}
86 97
			}

Also available in: Unified diff