Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.collector.plugins;
2
3 28859 sandro.lab
import java.io.BufferedReader;
4 44437 michele.ar
import java.io.FileInputStream;
5 26600 sandro.lab
import java.io.IOException;
6 44437 michele.ar
import java.io.InputStreamReader;
7 26600 sandro.lab
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Iterator;
10
11 44437 michele.ar
import org.apache.commons.io.input.BOMInputStream;
12 50665 jochen.sch
import org.apache.commons.lang3.StringEscapeUtils;
13
import org.apache.commons.lang3.StringUtils;
14 26600 sandro.lab
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16
import org.dom4j.Document;
17
import org.dom4j.DocumentHelper;
18
import org.dom4j.Element;
19
20 44437 michele.ar
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
21
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
22
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
23
24 48023 claudio.at
/**
25
 * Please use eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin instead
26
 */
27
@Deprecated
28 33202 michele.ar
public class FileCSVCollectorPlugin extends AbstractCollectorPlugin {
29 26600 sandro.lab
30
	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
31
32
	class FileCSVIterator implements Iterator<String> {
33
34 28859 sandro.lab
		private String next;
35 26600 sandro.lab
36 28859 sandro.lab
		private BufferedReader reader;
37
38
		private String separator;
39 44306 alessia.ba
		private String quote;
40 28859 sandro.lab
41 44437 michele.ar
		public FileCSVIterator(final BufferedReader reader, final String separator, final String quote) {
42 28859 sandro.lab
			this.reader = reader;
43
			this.separator = separator;
44 44306 alessia.ba
			this.quote = quote;
45 28859 sandro.lab
			next = calculateNext();
46
		}
47
48 26600 sandro.lab
		@Override
49
		public boolean hasNext() {
50
			return next != null;
51
		}
52
53
		@Override
54
		public String next() {
55 44437 michele.ar
			final String s = next;
56 28859 sandro.lab
			next = calculateNext();
57
			return s;
58 26600 sandro.lab
		}
59
60
		private String calculateNext() {
61
			try {
62 44437 michele.ar
				final Document document = DocumentHelper.createDocument();
63
				final Element root = document.addElement("csvRecord");
64 26600 sandro.lab
65 28859 sandro.lab
				String newLine = reader.readLine();
66 26600 sandro.lab
67 28859 sandro.lab
				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
68
				if (newLine == null) {
69
					newLine = reader.readLine();
70
				}
71
				if (newLine == null) {
72
					log.info("there is no line, closing RESULT SET");
73
74
					reader.close();
75
					return null;
76
				}
77 44437 michele.ar
				final String[] currentRow = newLine.split(separator);
78 28859 sandro.lab
79 26600 sandro.lab
				if (currentRow != null) {
80
81
					for (int i = 0; i < currentRow.length; i++) {
82 44437 michele.ar
						final String hAttribute = (headers != null) && (i < headers.length) ? headers[i] : "column" + i;
83 26600 sandro.lab
84 44437 michele.ar
						final Element row = root.addElement("column");
85 26600 sandro.lab
						if (i == identifierNumber) {
86
							row.addAttribute("isID", "true");
87
						}
88 44437 michele.ar
						final String value = StringUtils.isBlank(quote) ? currentRow[i] : StringUtils.strip(currentRow[i], quote);
89 44306 alessia.ba
90
						row.addAttribute("name", hAttribute).addText(value);
91 26600 sandro.lab
					}
92
					return document.asXML();
93
				}
94 44437 michele.ar
			} catch (final IOException e) {
95 26600 sandro.lab
				log.error("Error calculating next csv element", e);
96
			}
97
			return null;
98
		}
99
100
		@Override
101
		public void remove() {
102 32953 alessia.ba
			throw new UnsupportedOperationException();
103 26600 sandro.lab
		}
104
105
	}
106
107
	private String[] headers = null;
108
	private int identifierNumber;
109
110
	@Override
111 28859 sandro.lab
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
112
			throws CollectorServiceException {
113 26600 sandro.lab
		final String header = interfaceDescriptor.getParams().get("header");
114
		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
115 44306 alessia.ba
		final String quote = interfaceDescriptor.getParams().get("quote");
116 28859 sandro.lab
117 26600 sandro.lab
		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
118
		URL u = null;
119
		try {
120
			u = new URL(interfaceDescriptor.getBaseUrl());
121 44437 michele.ar
		} catch (final MalformedURLException e1) {
122 26600 sandro.lab
			throw new CollectorServiceException(e1);
123
		}
124
		final String baseUrl = u.getPath();
125
126
		log.info("base URL = " + baseUrl);
127 28859 sandro.lab
128 26600 sandro.lab
		try {
129 44437 michele.ar
130
			final BufferedReader br = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(baseUrl))));
131
132
			if ((header != null) && "true".equals(header.toLowerCase())) {
133
				final String[] tmpHeader = br.readLine().split(separator);
134
				if (StringUtils.isNotBlank(quote)) {
135
					int i = 0;
136 44306 alessia.ba
					headers = new String[tmpHeader.length];
137 44437 michele.ar
					for (final String h : tmpHeader) {
138 44306 alessia.ba
						headers[i] = StringUtils.strip(h, quote);
139
						i++;
140
					}
141 44437 michele.ar
				} else headers = tmpHeader;
142 26600 sandro.lab
			}
143 48023 claudio.at
			return () -> new FileCSVIterator(br, separator, quote);
144 44437 michele.ar
		} catch (final Exception e) {
145 32953 alessia.ba
			throw new CollectorServiceException(e);
146 26600 sandro.lab
		}
147
	}
148
149
}