Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.BufferedReader;
4
import java.io.FileInputStream;
5
import java.io.IOException;
6
import java.io.InputStreamReader;
7
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Iterator;
10

    
11
import org.apache.commons.io.input.BOMInputStream;
12
import org.apache.commons.lang.StringEscapeUtils;
13
import org.apache.commons.lang.StringUtils;
14
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16
import org.dom4j.Document;
17
import org.dom4j.DocumentHelper;
18
import org.dom4j.Element;
19

    
20
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
21
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
22
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
23

    
24
/**
25
 * Please use eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin instead
26
 */
27
@Deprecated
28
public class FileCSVCollectorPlugin extends AbstractCollectorPlugin {
29

    
30
	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
31

    
32
	class FileCSVIterator implements Iterator<String> {
33

    
34
		private String next;
35

    
36
		private BufferedReader reader;
37

    
38
		private String separator;
39
		private String quote;
40

    
41
		public FileCSVIterator(final BufferedReader reader, final String separator, final String quote) {
42
			this.reader = reader;
43
			this.separator = separator;
44
			this.quote = quote;
45
			next = calculateNext();
46
		}
47

    
48
		@Override
49
		public boolean hasNext() {
50
			return next != null;
51
		}
52

    
53
		@Override
54
		public String next() {
55
			final String s = next;
56
			next = calculateNext();
57
			return s;
58
		}
59

    
60
		private String calculateNext() {
61
			try {
62
				final Document document = DocumentHelper.createDocument();
63
				final Element root = document.addElement("csvRecord");
64

    
65
				String newLine = reader.readLine();
66

    
67
				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
68
				if (newLine == null) {
69
					newLine = reader.readLine();
70
				}
71
				if (newLine == null) {
72
					log.info("there is no line, closing RESULT SET");
73

    
74
					reader.close();
75
					return null;
76
				}
77
				final String[] currentRow = newLine.split(separator);
78

    
79
				if (currentRow != null) {
80

    
81
					for (int i = 0; i < currentRow.length; i++) {
82
						final String hAttribute = (headers != null) && (i < headers.length) ? headers[i] : "column" + i;
83

    
84
						final Element row = root.addElement("column");
85
						if (i == identifierNumber) {
86
							row.addAttribute("isID", "true");
87
						}
88
						final String value = StringUtils.isBlank(quote) ? currentRow[i] : StringUtils.strip(currentRow[i], quote);
89

    
90
						row.addAttribute("name", hAttribute).addText(value);
91
					}
92
					return document.asXML();
93
				}
94
			} catch (final IOException e) {
95
				log.error("Error calculating next csv element", e);
96
			}
97
			return null;
98
		}
99

    
100
		@Override
101
		public void remove() {
102
			throw new UnsupportedOperationException();
103
		}
104

    
105
	}
106

    
107
	private String[] headers = null;
108
	private int identifierNumber;
109

    
110
	@Override
111
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
112
			throws CollectorServiceException {
113
		final String header = interfaceDescriptor.getParams().get("header");
114
		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
115
		final String quote = interfaceDescriptor.getParams().get("quote");
116

    
117
		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
118
		URL u = null;
119
		try {
120
			u = new URL(interfaceDescriptor.getBaseUrl());
121
		} catch (final MalformedURLException e1) {
122
			throw new CollectorServiceException(e1);
123
		}
124
		final String baseUrl = u.getPath();
125

    
126
		log.info("base URL = " + baseUrl);
127

    
128
		try {
129

    
130
			final BufferedReader br = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(baseUrl))));
131

    
132
			if ((header != null) && "true".equals(header.toLowerCase())) {
133
				final String[] tmpHeader = br.readLine().split(separator);
134
				if (StringUtils.isNotBlank(quote)) {
135
					int i = 0;
136
					headers = new String[tmpHeader.length];
137
					for (final String h : tmpHeader) {
138
						headers[i] = StringUtils.strip(h, quote);
139
						i++;
140
					}
141
				} else headers = tmpHeader;
142
			}
143
			return () -> new FileCSVIterator(br, separator, quote);
144
		} catch (final Exception e) {
145
			throw new CollectorServiceException(e);
146
		}
147
	}
148

    
149
}
(3-3/7)