Revision 47519
Added by Alessia Bardi almost 7 years ago
FilesystemIterable.java | ||
---|---|---|
6 | 6 |
import java.net.MalformedURLException; |
7 | 7 |
import java.net.URL; |
8 | 8 |
import java.util.Iterator; |
9 |
import java.util.List; |
|
9 | 10 |
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
|
|
14 |
import com.google.common.base.Function; |
|
15 | 11 |
import com.google.common.collect.Iterators; |
16 |
|
|
12 |
import com.google.common.collect.Lists; |
|
17 | 13 |
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; |
18 | 14 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
19 | 15 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
16 |
import org.apache.commons.io.IOUtils; |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
import org.apache.commons.logging.Log; |
|
19 |
import org.apache.commons.logging.LogFactory; |
|
20 |
import org.json.JSONObject; |
|
21 |
import org.json.XML; |
|
20 | 22 |
|
21 | 23 |
/** |
22 | 24 |
* The Class FilesystemIterable. |
... | ... | |
25 | 27 |
*/ |
26 | 28 |
public class FilesystemIterable implements Iterable<String> { |
27 | 29 |
|
30 |
|
|
28 | 31 |
/** The Constant log. */ |
29 | 32 |
private static final Log log = LogFactory.getLog(FilesystemIterable.class); |
30 | 33 |
|
... | ... | |
32 | 35 |
private File baseDir; |
33 | 36 |
|
34 | 37 |
/** The extensions. */ |
35 |
private String extension; |
|
38 |
private String extensions;
|
|
36 | 39 |
|
40 |
/** File format (json / xml) **/ |
|
41 |
private String fileFormat = "xml"; |
|
42 |
|
|
43 |
private List<String> supportedFormats = Lists.newArrayList("xml", "json"); |
|
44 |
|
|
37 | 45 |
/** |
38 | 46 |
* Instantiates a new filesystem iterable. |
39 | 47 |
* |
... | ... | |
48 | 56 |
URL basePath = new URL(baseUrl); |
49 | 57 |
this.baseDir = new File(basePath.getPath()); |
50 | 58 |
if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); } |
51 |
this.extension = descriptor.getParams().get("extensions"); |
|
59 |
this.extensions = descriptor.getParams().get("extensions"); |
|
60 |
if(descriptor.getParams().containsKey("fileFormat")) fileFormat = descriptor.getParams().get("fileFormat"); |
|
61 |
if(!supportedFormats.contains(fileFormat)) throw new CollectorServiceException("File format "+fileFormat+" not supported. Supported formats are: "+ StringUtils |
|
62 |
.join(supportedFormats, ',')); |
|
52 | 63 |
} catch (MalformedURLException e) { |
53 | 64 |
throw new CollectorServiceException("Filesystem collector failed! ", e); |
54 | 65 |
} |
... | ... | |
61 | 72 |
*/ |
62 | 73 |
@Override |
63 | 74 |
public Iterator<String> iterator() { |
64 |
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extension); |
|
65 |
return Iterators.transform(fsi, new Function<String, String>() {
|
|
66 |
|
|
67 |
@Override
|
|
68 |
public String apply(final String inputFileName) {
|
|
69 |
FileInputStream fileInputStream = null;
|
|
70 |
try {
|
|
71 |
fileInputStream = new FileInputStream(inputFileName);
|
|
72 |
final String s = IOUtils.toString(fileInputStream);
|
|
73 |
return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
|
|
74 |
} catch (Exception e) {
|
|
75 |
log.error("Unable to read " + inputFileName);
|
|
76 |
return "";
|
|
77 |
} finally {
|
|
78 |
if (fileInputStream != null) {
|
|
79 |
try {
|
|
80 |
fileInputStream.close();
|
|
81 |
} catch (IOException e) {
|
|
82 |
log.error("Unable to close inputstream for " + inputFileName);
|
|
83 |
}
|
|
75 |
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extensions);
|
|
76 |
return Iterators.transform(fsi, inputFileName -> {
|
|
77 |
FileInputStream fileInputStream = null; |
|
78 |
try {
|
|
79 |
fileInputStream = new FileInputStream(inputFileName);
|
|
80 |
final String s = IOUtils.toString(fileInputStream);
|
|
81 |
if(fileFormat.equalsIgnoreCase("json")){
|
|
82 |
JSONObject json = new JSONObject(s);
|
|
83 |
return XML.toString(json, "record");
|
|
84 |
}
|
|
85 |
return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
|
|
86 |
} catch (Exception e) {
|
|
87 |
log.error("Unable to read " + inputFileName);
|
|
88 |
return "";
|
|
89 |
} finally {
|
|
90 |
if (fileInputStream != null) {
|
|
91 |
try {
|
|
92 |
fileInputStream.close();
|
|
93 |
} catch (IOException e) {
|
|
94 |
log.error("Unable to close inputstream for " + inputFileName);
|
|
84 | 95 |
} |
85 | 96 |
} |
86 | 97 |
} |
Also available in: Unified diff
FS Plugin: for claims we need to be able to collect json files