Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileReader;
6
import java.io.IOException;
7
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Iterator;
10
import java.util.List;
11

    
12
import org.apache.commons.lang.StringEscapeUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.dom4j.Document;
16
import org.dom4j.DocumentHelper;
17
import org.dom4j.Element;
18

    
19
import com.google.common.collect.Lists;
20

    
21
import eu.dnetlib.data.collector.plugin.CollectorPlugin;
22
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
23
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
24

    
25
public class FileCSVCollectorPlugin implements CollectorPlugin {
26

    
27
	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
28

    
29
	class FileCSVIterator implements Iterator<String> {
30

    
31
		private String next;
32

    
33
		private BufferedReader reader;
34

    
35
		private String separator;
36

    
37
		public FileCSVIterator(final BufferedReader reader, final String separator) {
38
			this.reader = reader;
39
			this.separator = separator;
40
			next = calculateNext();
41
		}
42

    
43
		@Override
44
		public boolean hasNext() {
45
			return next != null;
46
		}
47

    
48
		@Override
49
		public String next() {
50
			String s = next;
51
			next = calculateNext();
52
			return s;
53
		}
54

    
55
		private String calculateNext() {
56
			try {
57
				Document document = DocumentHelper.createDocument();
58
				Element root = document.addElement("csvRecord");
59

    
60
				String newLine = reader.readLine();
61

    
62
				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
63
				if (newLine == null) {
64
					newLine = reader.readLine();
65
				}
66
				if (newLine == null) {
67
					log.info("there is no line, closing RESULT SET");
68

    
69
					reader.close();
70
					return null;
71
				}
72
				String[] currentRow = newLine.split(separator);
73

    
74
				if (currentRow != null) {
75

    
76
					for (int i = 0; i < currentRow.length; i++) {
77
						String hAttribute = headers == null ? "column" + i : headers[i];
78

    
79
						Element row = root.addElement("column");
80
						if (i == identifierNumber) {
81
							row.addAttribute("isID", "true");
82
						}
83
						row.addAttribute("name", hAttribute).addText(currentRow[i]);
84
					}
85
					return document.asXML();
86
				}
87
			} catch (IOException e) {
88
				log.error("Error calculating next csv element", e);
89
			}
90
			return null;
91
		}
92

    
93
		@Override
94
		public void remove() {
95
			throw new UnsupportedOperationException();
96
		}
97

    
98
	}
99

    
100
	private String[] headers = null;
101
	private int identifierNumber;
102

    
103
	@Override
104
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
105
			throws CollectorServiceException {
106
		final String header = interfaceDescriptor.getParams().get("header");
107
		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
108

    
109
		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
110
		URL u = null;
111
		try {
112
			u = new URL(interfaceDescriptor.getBaseUrl());
113
		} catch (MalformedURLException e1) {
114
			throw new CollectorServiceException(e1);
115
		}
116
		final String baseUrl = u.getPath();
117

    
118
		log.info("base URL = " + baseUrl);
119

    
120
		try {
121
			final BufferedReader br = new BufferedReader(new FileReader(new File(baseUrl)));
122
			if ((header != null) && ("true".equals(header.toLowerCase()))) {
123
				headers = br.readLine().split(separator);
124
			}
125
			return new Iterable<String>() {
126

    
127
				@Override
128
				public Iterator<String> iterator() {
129
					FileCSVIterator it = new FileCSVIterator(br, separator);
130

    
131
					return it;
132
				}
133
			};
134
		} catch (Exception e) {
135
			throw new CollectorServiceException(e);
136
		}
137
	}
138

    
139
	@Override
140
	public String getProtocol() {
141

    
142
		return "fileCSV";
143
	}
144

    
145
	@Override
146
	public List<String> listNameParameters() {
147

    
148
		return Lists.newArrayList("header", "separator", "identifier", "quote");
149
	}
150

    
151
}
(3-3/6)