Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileReader;
6
import java.io.IOException;
7
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Iterator;
10

    
11
import eu.dnetlib.rmi.data.CollectorServiceException;
12
import eu.dnetlib.rmi.data.InterfaceDescriptor;
13
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
14
import org.apache.commons.lang3.StringEscapeUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.dom4j.Document;
18
import org.dom4j.DocumentHelper;
19
import org.dom4j.Element;
20

    
21
public class FileCSVCollectorPlugin extends AbstractCollectorPlugin {
22

    
23
	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
24
	private String[] headers = null;
25
	private int identifierNumber;
26

    
27
	@Override
28
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
29
			throws CollectorServiceException {
30
		final String header = interfaceDescriptor.getParams().get("header");
31
		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
32

    
33
		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
34
		URL u = null;
35
		try {
36
			u = new URL(interfaceDescriptor.getBaseUrl());
37
		} catch (MalformedURLException e1) {
38
			throw new CollectorServiceException(e1);
39
		}
40
		final String baseUrl = u.getPath();
41

    
42
		log.info("base URL = " + baseUrl);
43

    
44
		try {
45
			final BufferedReader br = new BufferedReader(new FileReader(new File(baseUrl)));
46
			if (header != null && "true".equals(header.toLowerCase())) {
47
				headers = br.readLine().split(separator);
48
			}
49
			return new Iterable<String>() {
50

    
51
				@Override
52
				public Iterator<String> iterator() {
53
					return new FileCSVIterator(br, separator);
54
				}
55
			};
56
		} catch (Exception e) {
57
			throw new CollectorServiceException(e);
58
		}
59
	}
60

    
61
	class FileCSVIterator implements Iterator<String> {
62

    
63
		private String next;
64

    
65
		private BufferedReader reader;
66

    
67
		private String separator;
68

    
69
		public FileCSVIterator(final BufferedReader reader, final String separator) {
70
			this.reader = reader;
71
			this.separator = separator;
72
			next = calculateNext();
73
		}
74

    
75
		@Override
76
		public boolean hasNext() {
77
			return next != null;
78
		}
79

    
80
		@Override
81
		public String next() {
82
			String s = next;
83
			next = calculateNext();
84
			return s;
85
		}
86

    
87
		private String calculateNext() {
88
			try {
89
				Document document = DocumentHelper.createDocument();
90
				Element root = document.addElement("csvRecord");
91

    
92
				String newLine = reader.readLine();
93

    
94
				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
95
				if (newLine == null) {
96
					newLine = reader.readLine();
97
				}
98
				if (newLine == null) {
99
					log.info("there is no line, closing RESULT SET");
100

    
101
					reader.close();
102
					return null;
103
				}
104
				String[] currentRow = newLine.split(separator);
105

    
106
				if (currentRow != null) {
107

    
108
					for (int i = 0; i < currentRow.length; i++) {
109
						String hAttribute = headers != null && i < headers.length ? headers[i] : "column" + i;
110

    
111
						Element row = root.addElement("column");
112
						if (i == identifierNumber) {
113
							row.addAttribute("isID", "true");
114
						}
115
						row.addAttribute("name", hAttribute).addText(currentRow[i]);
116
					}
117
					return document.asXML();
118
				}
119
			} catch (IOException e) {
120
				log.error("Error calculating next csv element", e);
121
			}
122
			return null;
123
		}
124

    
125
		@Override
126
		public void remove() {
127
			throw new UnsupportedOperationException();
128
		}
129

    
130
	}
131

    
132
}
(3-3/7)