Project

General

Profile

1
package eu.dnetlib.data.collector.plugins;
2

    
3
import java.io.InputStreamReader;
4
import java.io.Reader;
5
import java.net.URL;
6
import java.util.Iterator;
7
import java.util.Set;
8

    
9
import com.google.common.base.Function;
10
import com.google.common.collect.Iterators;
11
import eu.dnetlib.rmi.data.CollectorServiceException;
12
import eu.dnetlib.rmi.data.InterfaceDescriptor;
13
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
14
import org.apache.commons.csv.CSVFormat;
15
import org.apache.commons.csv.CSVParser;
16
import org.apache.commons.csv.CSVRecord;
17
import org.apache.commons.lang3.StringUtils;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.dom4j.Document;
21
import org.dom4j.DocumentHelper;
22
import org.dom4j.Element;
23

    
24
/**
25
 * The Class HttpCSVCollectorPlugin.
26
 */
27
public class HttpCSVCollectorPlugin extends AbstractCollectorPlugin {
28

    
29
	private static final Log log = LogFactory.getLog(HttpCSVCollectorPlugin.class);
30

    
31
	/*
32
	 * (non-Javadoc)
33
	 *
34
	 * @see eu.dnetlib.data.collector.plugin.CollectorPlugin#collect(eu.dnetlib.data.collector.rmi.InterfaceDescriptor, java.lang.String,
35
	 * java.lang.String)
36
	 */
37
	@Override
38
	public Iterable<String> collect(final InterfaceDescriptor descriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
39

    
40
		return new HTTPCSVIterator(descriptor);
41
	}
42

    
43
	/**
44
	 * The Class HTTPCSVIterator.
45
	 */
46
	class HTTPCSVIterator implements Iterable<String> {
47

    
48
		/**
49
		 * The descriptor.
50
		 */
51
		private InterfaceDescriptor descriptor;
52

    
53
		/**
54
		 * Instantiates a new HTTPCSV iterator.
55
		 *
56
		 * @param descriptor the descriptor
57
		 */
58
		public HTTPCSVIterator(final InterfaceDescriptor descriptor) {
59
			this.descriptor = descriptor;
60
		}
61

    
62
		/**
63
		 * Iterator.
64
		 *
65
		 * @return the iterator
66
		 */
67
		@SuppressWarnings("resource")
68
		@Override
69
		public Iterator<String> iterator() {
70

    
71
			try {
72
				final String separatorString = descriptor.getParams().get("separator");
73
				final String identifier = descriptor.getParams().get("identifier");
74
				final String quote = descriptor.getParams().get("quote");
75
				final URL url = new URL(descriptor.getBaseUrl());
76
				url.openConnection();
77

    
78
				final Reader reader = new InputStreamReader(url.openStream());
79
				final char separator = separatorString.equals("\\t") || StringUtils.isBlank(separatorString) ? '\t' : separatorString.charAt(0);
80

    
81
				final CSVFormat format = StringUtils.isBlank(quote) ?
82
						CSVFormat.EXCEL.withHeader().withDelimiter(separator) :
83
						CSVFormat.EXCEL.withHeader().withDelimiter(separator).withQuote(quote.charAt(0));
84

    
85
				final CSVParser parser = new CSVParser(reader, format);
86
				final Set<String> headers = parser.getHeaderMap().keySet();
87

    
88
				return Iterators.transform(parser.iterator(), new Function<CSVRecord, String>() {
89

    
90
					@Override
91
					public String apply(final CSVRecord input) {
92
						final Document document = DocumentHelper.createDocument();
93
						final Element root = document.addElement("csvRecord");
94
						for (String key : headers) {
95
							final Element row = root.addElement("column");
96
							row.addAttribute("name", key).addText(input.get(key));
97
							if (key.equals(identifier)) {
98
								row.addAttribute("isID", "true");
99
							}
100
						}
101

    
102
						return document.asXML();
103
					}
104
				});
105
			} catch (Exception e) {
106
				log.error("Error iterating csv lines", e);
107
				return null;
108
			}
109
		}
110
	}
111

    
112
}
(6-6/8)