Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.BufferedReader;
4
import java.io.FileInputStream;
5
import java.io.IOException;
6
import java.io.InputStreamReader;
7
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Iterator;
10

    
11
import org.apache.commons.io.input.BOMInputStream;
12
import org.apache.commons.lang.StringEscapeUtils;
13
import org.apache.commons.lang.StringUtils;
14
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16
import org.dom4j.Document;
17
import org.dom4j.DocumentHelper;
18
import org.dom4j.Element;
19

    
20
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
21
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
22
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
23

    
24
public class FileCSVCollectorPlugin extends AbstractCollectorPlugin {
25

    
26
	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
27

    
28
	class FileCSVIterator implements Iterator<String> {
29

    
30
		private String next;
31

    
32
		private BufferedReader reader;
33

    
34
		private String separator;
35
		private String quote;
36

    
37
		public FileCSVIterator(final BufferedReader reader, final String separator, final String quote) {
38
			this.reader = reader;
39
			this.separator = separator;
40
			this.quote = quote;
41
			next = calculateNext();
42
		}
43

    
44
		@Override
45
		public boolean hasNext() {
46
			return next != null;
47
		}
48

    
49
		@Override
50
		public String next() {
51
			final String s = next;
52
			next = calculateNext();
53
			return s;
54
		}
55

    
56
		private String calculateNext() {
57
			try {
58
				final Document document = DocumentHelper.createDocument();
59
				final Element root = document.addElement("csvRecord");
60

    
61
				String newLine = reader.readLine();
62

    
63
				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
64
				if (newLine == null) {
65
					newLine = reader.readLine();
66
				}
67
				if (newLine == null) {
68
					log.info("there is no line, closing RESULT SET");
69

    
70
					reader.close();
71
					return null;
72
				}
73
				final String[] currentRow = newLine.split(separator);
74

    
75
				if (currentRow != null) {
76

    
77
					for (int i = 0; i < currentRow.length; i++) {
78
						final String hAttribute = (headers != null) && (i < headers.length) ? headers[i] : "column" + i;
79

    
80
						final Element row = root.addElement("column");
81
						if (i == identifierNumber) {
82
							row.addAttribute("isID", "true");
83
						}
84
						final String value = StringUtils.isBlank(quote) ? currentRow[i] : StringUtils.strip(currentRow[i], quote);
85

    
86
						row.addAttribute("name", hAttribute).addText(value);
87
					}
88
					return document.asXML();
89
				}
90
			} catch (final IOException e) {
91
				log.error("Error calculating next csv element", e);
92
			}
93
			return null;
94
		}
95

    
96
		@Override
97
		public void remove() {
98
			throw new UnsupportedOperationException();
99
		}
100

    
101
	}
102

    
103
	private String[] headers = null;
104
	private int identifierNumber;
105

    
106
	@Override
107
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
108
			throws CollectorServiceException {
109
		final String header = interfaceDescriptor.getParams().get("header");
110
		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
111
		final String quote = interfaceDescriptor.getParams().get("quote");
112

    
113
		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
114
		URL u = null;
115
		try {
116
			u = new URL(interfaceDescriptor.getBaseUrl());
117
		} catch (final MalformedURLException e1) {
118
			throw new CollectorServiceException(e1);
119
		}
120
		final String baseUrl = u.getPath();
121

    
122
		log.info("base URL = " + baseUrl);
123

    
124
		try {
125

    
126
			final BufferedReader br = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(baseUrl))));
127

    
128
			if ((header != null) && "true".equals(header.toLowerCase())) {
129
				final String[] tmpHeader = br.readLine().split(separator);
130
				if (StringUtils.isNotBlank(quote)) {
131
					int i = 0;
132
					headers = new String[tmpHeader.length];
133
					for (final String h : tmpHeader) {
134
						headers[i] = StringUtils.strip(h, quote);
135
						i++;
136
					}
137
				} else headers = tmpHeader;
138
			}
139
			return new Iterable<String>() {
140

    
141
				@Override
142
				public Iterator<String> iterator() {
143
					return new FileCSVIterator(br, separator, quote);
144
				}
145
			};
146
		} catch (final Exception e) {
147
			throw new CollectorServiceException(e);
148
		}
149
	}
150

    
151
}
(3-3/7)