Project

General

Profile

« Previous | Next » 

Revision 48025

[maven-release-plugin] copy for tag dnet-modular-collector-service-3.3.11

View differences:

modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpIterator.java
1
package eu.dnetlib.data.collector.plugins.sftp;
2

  
3
import java.io.OutputStream;
4
import java.net.URI;
5
import java.net.URISyntaxException;
6
import java.util.*;
7

  
8
import com.jcraft.jsch.*;
9
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
10
import org.apache.commons.io.output.ByteArrayOutputStream;
11
import org.apache.commons.lang.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.joda.time.DateTime;
15
import org.joda.time.format.DateTimeFormat;
16
import org.joda.time.format.DateTimeFormatter;
17

  
18
/**
19
 * Created by andrea on 11/01/16.
20
 */
21
public class SftpIterator implements Iterator<String> {
22
    private static final Log log = LogFactory.getLog(SftpIterator.class);
23

  
24
    private static final int MAX_RETRIES = 5;
25
    private static final int DEFAULT_TIMEOUT = 30000;
26
    private static final long BACKOFF_MILLIS = 10000;
27

  
28
    private String baseUrl;
29
    private String sftpURIScheme;
30
    private String sftpServerAddress;
31
    private String remoteSftpBasePath;
32
    private String username;
33
    private String password;
34
    private boolean isRecursive;
35
    private Set<String> extensionsSet;
36
	private boolean incremental;
37

  
38
    private Session sftpSession;
39
    private ChannelSftp sftpChannel;
40

  
41
    private Queue<String> queue;
42

  
43
	private DateTime fromDate = null;
44
	private DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
45

  
46
	public SftpIterator(String baseUrl, String username, String password, boolean isRecursive, Set<String> extensionsSet, String fromDate) {
47
		this.baseUrl = baseUrl;
48
        this.username = username;
49
        this.password = password;
50
        this.isRecursive = isRecursive;
51
        this.extensionsSet = extensionsSet;
52
		this.incremental = StringUtils.isNotBlank(fromDate);
53
		if (incremental) {
54
			//I expect fromDate in the format 'yyyy-MM-dd'. See class eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode .
55
			this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter);
56
			log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString());
57
		}
58
		try {
59
            URI sftpServer = new URI(baseUrl);
60
            this.sftpURIScheme = sftpServer.getScheme();
61
            this.sftpServerAddress = sftpServer.getHost();
62
            this.remoteSftpBasePath = sftpServer.getPath();
63
        } catch (URISyntaxException e) {
64
            throw new CollectorServiceRuntimeException("Bad syntax in the URL " + baseUrl);
65
        }
66

  
67
        connectToSftpServer();
68
        initializeQueue();
69
    }
70

  
71
    private void connectToSftpServer() {
72
        JSch jsch = new JSch();
73

  
74
        try {
75
            JSch.setConfig("StrictHostKeyChecking", "no");
76
            sftpSession = jsch.getSession(username, sftpServerAddress);
77
            sftpSession.setPassword(password);
78
            sftpSession.connect();
79

  
80
            Channel channel = sftpSession.openChannel(sftpURIScheme);
81
            channel.connect();
82
            sftpChannel = (ChannelSftp) channel;
83
	        String pwd = sftpChannel.pwd();
84
	        log.debug("PWD from server: " + pwd);
85
	        String fullPath = pwd + remoteSftpBasePath;
86
	        sftpChannel.cd(fullPath);
87
	        log.debug("PWD from server 2 after 'cd " + fullPath + "' : " + sftpChannel.pwd());
88
	        log.info("Connected to SFTP server " + sftpServerAddress);
89
        } catch (JSchException e) {
90
            throw new CollectorServiceRuntimeException("Unable to connect to remote SFTP server.", e);
91
        } catch (SftpException e) {
92
            throw new CollectorServiceRuntimeException("Unable to access the base remote path on the SFTP server.", e);
93
        }
94
    }
95

  
96
    private void disconnectFromSftpServer() {
97
        sftpChannel.exit();
98
        sftpSession.disconnect();
99
    }
100

  
101
    private void initializeQueue() {
102
        queue = new LinkedList<String>();
103
	    log.info(String.format("SFTP collector plugin collecting from %s with recursion = %s, incremental = %s with fromDate=%s", remoteSftpBasePath,
104
			    isRecursive,
105
			    incremental, fromDate));
106
	    listDirectoryRecursive(".", "");
107
    }
108

  
109
    private void listDirectoryRecursive(final String parentDir, final String currentDir) {
110
        String dirToList = parentDir;
111
	    if (StringUtils.isNotBlank(currentDir)) {
112
		    dirToList += "/" + currentDir;
113
        }
114
	    log.debug("PARENT DIR: " + parentDir);
115
	    log.debug("DIR TO LIST: " + dirToList);
116
	    try {
117
            Vector<ChannelSftp.LsEntry> ls = sftpChannel.ls(dirToList);
118
            for (ChannelSftp.LsEntry entry : ls) {
119
                String currentFileName = entry.getFilename();
120
                if (currentFileName.equals(".") || currentFileName.equals("..")) {
121
                    // skip parent directory and directory itself
122
                    continue;
123
                }
124

  
125
                SftpATTRS attrs = entry.getAttrs();
126
                if (attrs.isDir()) {
127
                    if (isRecursive) {
128
                        listDirectoryRecursive(dirToList, currentFileName);
129
                    }
130
                } else {
131
                    // test the file for extensions compliance and, just in case, add it to the list.
132
                    for (String ext : extensionsSet) {
133
                        if (currentFileName.endsWith(ext)) {
134
	                        //test if the file has been changed after the last collection date:
135
	                        if (incremental) {
136
		                        int mTime = attrs.getMTime();
137
		                        //int times are values reduced by the milliseconds, hence we multiply per 1000L
138
		                        DateTime dt = new DateTime(mTime * 1000L);
139
		                        if (dt.isAfter(fromDate)) {
140
			                        queue.add(currentFileName);
141
			                        log.debug(currentFileName + " has changed and must be re-collected");
142
		                        } else {
143
			                        if (log.isDebugEnabled()) {
144
				                        log.debug(currentFileName + " has not changed since last collection");
145
			                        }
146
		                        }
147
	                        } else {
148
		                        //if it is not incremental, just add it to the queue
149
		                        queue.add(currentFileName);
150
	                        }
151

  
152
                        }
153
                    }
154
                }
155
            }
156
        } catch (SftpException e) {
157
            throw new CollectorServiceRuntimeException("Cannot list the sftp remote directory", e);
158

  
159
        }
160
    }
161

  
162
    @Override
163
    public boolean hasNext() {
164
        if (queue.isEmpty()) {
165
            disconnectFromSftpServer();
166
            return false;
167
        } else {
168
            return true;
169
        }
170
    }
171

  
172
    @Override
173
    public String next() {
174
        String nextRemotePath = queue.remove();
175
        int nRepeat = 0;
176
	    String fullPathFile = nextRemotePath;
177
	    while (nRepeat < MAX_RETRIES) {
178
            try {
179
                OutputStream baos = new ByteArrayOutputStream();
180
                sftpChannel.get(nextRemotePath, baos);
181
	            if (log.isDebugEnabled()) {
182
		            fullPathFile = sftpChannel.pwd() + "/" + nextRemotePath;
183
		            log.debug(String.format("Collected file from SFTP: %s%s", sftpServerAddress, fullPathFile));
184
	            }
185
	            return baos.toString();
186
            } catch (SftpException e) {
187
                nRepeat++;
188
	            log.warn(String.format("An error occurred [%s] for %s%s, retrying.. [retried %s time(s)]", e.getMessage(), sftpServerAddress, fullPathFile,
189
			            nRepeat));
190
	            // disconnectFromSftpServer();
191
                try {
192
                    Thread.sleep(BACKOFF_MILLIS);
193
                } catch (InterruptedException e1) {
194
                    log.error(e1);
195
                }
196
            }
197
        }
198
	    throw new CollectorServiceRuntimeException(
199
			    String.format("Impossible to retrieve FTP file %s after %s retries. Aborting FTP collection.", fullPathFile, nRepeat));
200
    }
201

  
202
    @Override
203
    public void remove() {
204
        throw new UnsupportedOperationException();
205
    }
206
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.ftp;
2

  
3
import com.google.common.base.Splitter;
4
import com.google.common.collect.Sets;
5
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
6
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
8
import org.springframework.beans.factory.annotation.Required;
9

  
10
import java.util.Iterator;
11
import java.util.Set;
12

  
13
/**
14
 *
15
 * @author Author: Andrea Mannocci
16
 *
17
 */
18
public class FtpCollectorPlugin extends AbstractCollectorPlugin {
19

  
20
	private FtpIteratorFactory ftpIteratorFactory;
21

  
22
	@Override
23
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
24
			throws CollectorServiceException {
25

  
26
		final String baseUrl = interfaceDescriptor.getBaseUrl();
27
		final String username = interfaceDescriptor.getParams().get("username");
28
		final String password = interfaceDescriptor.getParams().get("password");
29
		final String recursive = interfaceDescriptor.getParams().get("recursive");
30
		final String extensions = interfaceDescriptor.getParams().get("extensions");
31

  
32
		if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
33
		if ((username == null) || username.isEmpty()) { throw new CollectorServiceException("Param 'username' is null or empty"); }
34
		if ((password == null) || password.isEmpty()) { throw new CollectorServiceException("Param 'password' is null or empty"); }
35
		if ((recursive == null) || recursive.isEmpty()) { throw new CollectorServiceException("Param 'recursive' is null or empty"); }
36
		if ((extensions == null) || extensions.isEmpty()) { throw new CollectorServiceException("Param 'extensions' is null or empty"); }
37

  
38
		return new Iterable<String>() {
39

  
40
			boolean isRecursive = "true".equals(recursive);
41

  
42
			Set<String> extensionsSet = parseSet(extensions);
43

  
44
			@Override
45
			public Iterator<String> iterator() {
46
				return getFtpIteratorFactory().newIterator(baseUrl, username, password, isRecursive, extensionsSet);
47
			}
48

  
49
			private Set<String> parseSet(final String extensions) {
50
				return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(extensions));
51
			}
52
		};
53
	}
54

  
55
	public FtpIteratorFactory getFtpIteratorFactory() {
56
		return ftpIteratorFactory;
57
	}
58

  
59
	@Required
60
	public void setFtpIteratorFactory(final FtpIteratorFactory ftpIteratorFactory) {
61
		this.ftpIteratorFactory = ftpIteratorFactory;
62
	}
63

  
64
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/filesfrommetadata/PopulateFileDownloadBasePath.java
1
package eu.dnetlib.data.collector.plugins.filesfrommetadata;
2

  
3
import java.util.List;
4
import java.util.Map;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Lists;
8
import eu.dnetlib.data.collector.functions.ParamValuesFunction;
9
import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
10
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
11
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
12
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.springframework.beans.factory.annotation.Autowired;
16
import org.springframework.beans.factory.annotation.Value;
17

  
18
/**
19
 * Created by alessia on 17/12/15.
20
 */
21
public class PopulateFileDownloadBasePath implements ParamValuesFunction {
22

  
23
	private static final Log log = LogFactory.getLog(PopulateFileDownloadBasePath.class);
24
	@Autowired
25
	private UniqueServiceLocator serviceLocator;
26

  
27
	@Value("${services.objectstore.basePathList.xquery}")
28
	private String xQueryForObjectStoreBasePath;
29

  
30
	@Override
31
	public List<ProtocolParameterValue> findValues(final String s, final Map<String, String> map) {
32
		try {
33
			return Lists.transform(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQueryForObjectStoreBasePath),
34
					new Function<String, ProtocolParameterValue>() {
35
						@Override
36
						public ProtocolParameterValue apply(final String s) {
37
							return new ProtocolParameterValue(s, s);
38
						}
39
					});
40
		} catch (ISLookUpException e) {
41
			log.error("Cannot read Object store service properties", e);
42
		}
43
		return Lists.newArrayList();
44
	}
45

  
46
	public UniqueServiceLocator getServiceLocator() {
47
		return serviceLocator;
48
	}
49

  
50
	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
51
		this.serviceLocator = serviceLocator;
52
	}
53

  
54
	public String getxQueryForObjectStoreBasePath() {
55
		return xQueryForObjectStoreBasePath;
56
	}
57

  
58
	public void setxQueryForObjectStoreBasePath(final String xQueryForObjectStoreBasePath) {
59
		this.xQueryForObjectStoreBasePath = xQueryForObjectStoreBasePath;
60
	}
61
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.oaisets;
2

  
3
import java.util.Iterator;
4

  
5
import org.springframework.beans.factory.annotation.Required;
6

  
7
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
8
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
9
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
10

  
11
public class OaiSetsCollectorPlugin extends AbstractCollectorPlugin {
12

  
13
	private OaiSetsIteratorFactory oaiSetsIteratorFactory;
14

  
15
	@Override
16
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
17
			throws CollectorServiceException {
18
		final String baseUrl = interfaceDescriptor.getBaseUrl();
19

  
20
		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
21

  
22
		return new Iterable<String>() {
23

  
24
			@Override
25
			public Iterator<String> iterator() {
26
				return oaiSetsIteratorFactory.newIterator(baseUrl);
27
			}
28
		};
29
	}
30

  
31
	public OaiSetsIteratorFactory getOaiSetsIteratorFactory() {
32
		return oaiSetsIteratorFactory;
33
	}
34

  
35
	@Required
36
	public void setOaiSetsIteratorFactory(final OaiSetsIteratorFactory oaiSetsIteratorFactory) {
37
		this.oaiSetsIteratorFactory = oaiSetsIteratorFactory;
38
	}
39

  
40
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FileSystemIterator.java
1
package eu.dnetlib.data.collector.plugins.filesystem;
2

  
3
import java.io.IOException;
4
import java.nio.file.Files;
5
import java.nio.file.Path;
6
import java.nio.file.Paths;
7
import java.util.Iterator;
8
import java.util.Set;
9

  
10
import org.apache.commons.io.FilenameUtils;
11
import org.apache.commons.lang.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

  
15
import com.google.common.collect.Iterators;
16
import com.google.common.collect.Sets;
17

  
18
/**
19
 * Class enabling lazy & recursive iteration of a filesystem tree. The iterator iterates over file paths.
20
 *
21
 * @author Andrea
22
 *
23
 */
24
public class FileSystemIterator implements Iterator<String> {
25

  
26
	/** The logger */
27
	private static final Log log = LogFactory.getLog(FileSystemIterator.class);
28

  
29
	private Set<String> extensions = Sets.newHashSet();
30
	private Iterator<Path> pathIterator;
31
	private String current;
32

  
33
	public FileSystemIterator(final String baseDir, final String extensions) {
34
		if(StringUtils.isNotBlank(extensions)) {
35
			this.extensions = Sets.newHashSet(extensions.split(","));
36
		}
37
		try {
38
			this.pathIterator = Files.newDirectoryStream(Paths.get(baseDir)).iterator();
39
			this.current = walkTillNext();
40
		} catch (IOException e) {
41
			log.error("Cannot initialize File System Iterator. Is this path correct? " + baseDir);
42
			throw new RuntimeException("Filesystem collection error.", e);
43
		}
44
	}
45

  
46
	@Override
47
	public boolean hasNext() {
48
		return current != null;
49
	}
50

  
51
	@Override
52
	public synchronized String next() {
53
		String pivot = new String(current);
54
		current = walkTillNext();
55
		log.debug("Returning: " + pivot);
56
		return pivot;
57
	}
58

  
59
	@Override
60
	public void remove() {}
61

  
62
	/**
63
	 * Walk the filesystem recursively until it finds a candidate. Strategies: a) For any directory found during the walk, an iterator is
64
	 * built and concat to the main one; b) Any file is checked against admitted extensions
65
	 *
66
	 * @return the next element to be returned by next call of this.next()
67
	 */
68
	private synchronized String walkTillNext() {
69
		while (pathIterator.hasNext()) {
70
			Path nextFilePath = pathIterator.next();
71
			if (Files.isDirectory(nextFilePath)) {
72
				// concat
73
				try {
74
					pathIterator = Iterators.concat(pathIterator, Files.newDirectoryStream(nextFilePath).iterator());
75
					log.debug("Adding folder iterator: " + nextFilePath.toString());
76
				} catch (IOException e) {
77
					log.error("Cannot create folder iterator! Is this path correct? " + nextFilePath.toString());
78
					return null;
79
				}
80
			} else {
81
				if (extensions.isEmpty() || extensions.contains(FilenameUtils.getExtension(nextFilePath.toString()))) {
82
					log.debug("Returning: " + nextFilePath.toString());
83
					return nextFilePath.toString();
84
				}
85
			}
86
		}
87
		return null;
88
	}
89
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-modular-collector-service</artifactId>
12
	<packaging>jar</packaging>
13
	<version>3.3.11</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11</developerConnection>
16
	</scm>
17
	<dependencies>
18
		<dependency>
19
			<groupId>eu.dnetlib</groupId>
20
			<artifactId>dnet-modular-collector-service-rmi</artifactId>
21
			<version>[1.3.0,2.0.0)</version>
22
		</dependency>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>cnr-resultset-service</artifactId>
26
			<version>[2.0.0,3.0.0)</version>
27
		</dependency>
28
		<dependency>
29
			<groupId>eu.dnetlib</groupId>
30
			<artifactId>cnr-blackboard-common</artifactId>
31
			<version>[2.0.0,3.0.0)</version>
32
		</dependency>
33
		<dependency>
34
			<groupId>javax.servlet</groupId>
35
			<artifactId>javax.servlet-api</artifactId>
36
			<version>${javax.servlet.version}</version>
37
			<scope>provided</scope>
38
		</dependency>
39
		<dependency>
40
			<groupId>net.sf.opencsv</groupId>
41
			<artifactId>opencsv</artifactId>
42
			<version>2.0</version>
43
		</dependency>
44
		<dependency>
45
			<groupId>junit</groupId>
46
			<artifactId>junit</artifactId>
47
			<version>${junit.version}</version>
48
			<scope>test</scope>
49
		</dependency>
50
		<dependency>
51
			<groupId>commons-net</groupId>
52
			<artifactId>commons-net</artifactId>
53
			<version>3.3</version>
54
		</dependency>
55
		<dependency>
56
			<groupId>org.apache.commons</groupId>
57
			<artifactId>commons-compress</artifactId>
58
			<version>1.6</version>
59
		</dependency>
60
		<dependency>
61
			<groupId>org.mockito</groupId>
62
			<artifactId>mockito-core</artifactId>
63
			<version>${mockito.version}</version>
64
			<scope>test</scope>
65
		</dependency>
66
		<dependency>
67
			<groupId>commons-httpclient</groupId>
68
			<artifactId>commons-httpclient</artifactId>
69
			<version>3.1</version>
70
		</dependency>
71
		<dependency>
72
			<groupId>com.google.code.gson</groupId>
73
			<artifactId>gson</artifactId>
74
			<version>${google.gson.version}</version>
75
		</dependency>
76
		<dependency>
77
			<groupId>org.apache.commons</groupId>
78
			<artifactId>commons-csv</artifactId>
79
			<version>1.4</version>
80
		</dependency>
81
		<dependency>
82
			<groupId>com.jcraft</groupId>
83
			<artifactId>jsch</artifactId>
84
			<version>0.1.53</version>
85
		</dependency>
86
		<dependency>
87
			<groupId>joda-time</groupId>
88
			<artifactId>joda-time</artifactId>
89
			<version>2.9.2</version>
90
		</dependency>
91
		<dependency>
92
			<groupId>org.json</groupId>
93
			<artifactId>json</artifactId>
94
			<version>20160810</version>
95
		</dependency>
96

  
97
		<dependency>
98
			<groupId>com.ximpleware</groupId>
99
			<artifactId>vtd-xml</artifactId>
100
			<version>2.13.2</version>
101
		</dependency>
102

  
103

  
104
	</dependencies>
105
</project>
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/HttpCSVCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins;
2

  
3
import java.io.*;
4
import java.net.URL;
5
import java.util.Iterator;
6
import java.util.Set;
7

  
8
import com.google.common.collect.Iterators;
9
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
10
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
11
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
12
import org.apache.commons.csv.CSVFormat;
13
import org.apache.commons.csv.CSVParser;
14
import org.apache.commons.io.input.BOMInputStream;
15
import org.apache.commons.lang.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.dom4j.Document;
19
import org.dom4j.DocumentHelper;
20
import org.dom4j.Element;
21

  
22
/**
23
 * The Class HttpCSVCollectorPlugin.
24
 */
25
public class HttpCSVCollectorPlugin extends AbstractCollectorPlugin {
26

  
27
	private static final Log log = LogFactory.getLog(HttpCSVCollectorPlugin.class);
28

  
29
	public static final String UTF8_BOM = "\uFEFF";
30

  
31
	/**
32
	 * The Class HTTPCSVIterator.
33
	 */
34
	class HTTPCSVIterator implements Iterable<String> {
35

  
36
		/** The descriptor. */
37
		private InterfaceDescriptor descriptor;
38

  
39
		/**
40
		 * Instantiates a new HTTPCSV iterator.
41
		 *
42
		 * @param descriptor
43
		 *            the descriptor
44
		 */
45
		public HTTPCSVIterator(final InterfaceDescriptor descriptor) {
46
			this.descriptor = descriptor;
47
		}
48

  
49
		/**
50
		 * Iterator.
51
		 *
52
		 * @return the iterator
53
		 */
54
		@SuppressWarnings("resource")
55
		@Override
56
		public Iterator<String> iterator() {
57

  
58
			try {
59
				final String separator = descriptor.getParams().get("separator");
60
				final String identifier = descriptor.getParams().get("identifier");
61
				final String quote = descriptor.getParams().get("quote");
62
				final URL url = new URL(descriptor.getBaseUrl());
63
				long nLines = 0;
64

  
65
				// FIX
66
				// This code should skip the lines with invalid quotes
67
				final File tempFile = File.createTempFile("csv-", ".tmp");
68
				try (InputStream is = url.openConnection().getInputStream();
69
						BOMInputStream bomIs = new BOMInputStream(is);
70
						BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs));
71
						FileWriter fw = new FileWriter(tempFile)) {
72

  
73
					String line;
74
					while ((line = reader.readLine()) != null) {
75
						if (StringUtils.isBlank(quote) || (quote.charAt(0) != '"') || verifyQuotes(line, separator.charAt(0))) {
76
							fw.write(line);
77
							fw.write("\n");
78
							nLines++;
79
						}
80
					}
81
				}
82
				// END FIX
83

  
84
				final CSVFormat format = CSVFormat.EXCEL
85
						.withHeader()
86
						.withDelimiter(separator.equals("\\t") || StringUtils.isBlank(separator) ? '\t' : separator.charAt(0))
87
						.withQuote(StringUtils.isBlank(quote) ? null : quote.charAt(0))
88
						.withTrim();
89

  
90
				final CSVParser parser = new CSVParser(new FileReader(tempFile), format);
91
				final Set<String> headers = parser.getHeaderMap().keySet();
92

  
93
				final long nRecords = nLines - 1;
94

  
95
				return Iterators.transform(parser.iterator(), input -> {
96
					try {
97
						final Document document = DocumentHelper.createDocument();
98
						final Element root = document.addElement("csvRecord");
99
						for (final String key : headers) {
100
							final Element row = root.addElement("column");
101
							row.addAttribute("name", key).addText(input.get(key));
102
							if (key.equals(identifier)) {
103
								row.addAttribute("isID", "true");
104
							}
105
						}
106

  
107
						return document.asXML();
108
					} finally {
109
						System.out.println(tempFile.getAbsolutePath());
110
						if (parser.getRecordNumber() == nRecords) {
111
							System.out.println("DELETING " + tempFile.getAbsolutePath());
112
							tempFile.delete();
113
						}
114
					}
115
				});
116
			} catch (final Exception e) {
117
				log.error("Error iterating csv lines", e);
118
				return null;
119
			}
120
		}
121

  
122
	}
123

  
124
	/*
125
	 * (non-Javadoc)
126
	 *
127
	 * @see eu.dnetlib.data.collector.plugin.CollectorPlugin#collect(eu.dnetlib.data.collector.rmi.InterfaceDescriptor, java.lang.String,
128
	 * java.lang.String)
129
	 */
130
	@Override
131
	public Iterable<String> collect(final InterfaceDescriptor descriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
132

  
133
		return new HTTPCSVIterator(descriptor);
134
	}
135

  
136
	public boolean verifyQuotes(final String line, final char separator) {
137
		final char[] cs = line.trim().toCharArray();
138
		boolean inField = false;
139
		boolean skipNext = false;
140
		for (int i = 0; i < cs.length; i++) {
141
			if (skipNext) {
142
				skipNext = false;
143
			} else if (inField) {
144
				if ((cs[i] == '\"') && ((i == (cs.length - 1)) || (cs[i + 1] == separator))) {
145
					inField = false;
146
				} else if ((cs[i] == '\"') && (i < (cs.length - 1))) {
147
					if ((cs[i + 1] == '\"')) {
148
						skipNext = true;
149
					} else {
150
						log.warn("Skipped invalid line: " + line);
151
						return false;
152
					}
153
				}
154
			} else {
155
				if ((cs[i] == '\"') && ((i == 0) || (cs[i - 1] == separator))) {
156
					inField = true;
157
				}
158
			}
159
		}
160

  
161
		if (inField) {
162
			log.warn("Skipped invalid line: " + line);
163
			return false;
164
		}
165

  
166
		return true;
167
	}
168

  
169
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/test/java/eu/dnetlib/data/collector/plugins/csv/HTTPCSVCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.csv;
2

  
3
import java.net.URISyntaxException;
4
import java.util.HashMap;
5

  
6
import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin;
7
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
8
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
9
import org.junit.Test;
10

  
11
import static org.junit.Assert.assertFalse;
12
import static org.junit.Assert.assertTrue;
13

  
14
public class HTTPCSVCollectorPluginTest {
15

  
16
	private String FILE_URL = HTTPCSVCollectorPluginTest.class.getResource("testCSVwithBOM.csv").toString();
17
	final HttpCSVCollectorPlugin plugin = new HttpCSVCollectorPlugin();
18

  
19
	@Test
20
	public void testCSVHeader() throws URISyntaxException, CollectorServiceException {
21

  
22
		final InterfaceDescriptor descr = new InterfaceDescriptor();
23
		final HashMap<String, String> params = new HashMap<String, String>();
24

  
25
		params.put("separator", ",");
26
		params.put("quote", "\"");
27
		params.put("identifier", "ID");
28
		descr.setBaseUrl(FILE_URL);
29
		descr.setParams(params);
30

  
31
		int i = 0;
32
		for (final String s : plugin.collect(descr, null, null)) {
33
			assertTrue(s.length() > 0);
34
			System.out.println(s);
35
			i++;
36
		}
37
		System.out.println(i);
38
		assertTrue(i > 0);
39
	}
40

  
41
	@Test
42
	public void testVerifyQuotesOk(){
43
		String correct = "\"5\",\"Il Padrino\",\"EEEEEEEE \"\"ZZZZZ\"\" EEEEEEEEEE\",1970";
44
		assertTrue(plugin.verifyQuotes(correct, ','));
45
	}
46

  
47
	@Test
48
	public void testVerifyQuotesWRONG(){
49
		String correct = "5\",\"Il Padrino\",\"EEEEEEEE \"ZZZZZ\" EEEEEEEEEE\",1970";
50
		assertFalse(plugin.verifyQuotes(correct, ','));
51
	}
52

  
53
	@Test
54
	public void testSNSF(){
55
		String s = "\"8773\";\"3101-008773\";\"EMBO workshop on structure, function and regulation of membrane transport proteins\";\"\";\"Rossier Bernard C.\";\"Scientific Conferences\";\"Science communication\";\"Département de Pharmacologie & Toxicologie Faculté de Biologie et de Médecine Université de Lausanne\";\"Université de Lausanne - LA\";\"30103\";\"Cellular Biology, Cytology\";\"Biology and Medicine;Basic Biological Research\";\"01.04.1987\";\"30.09.1987\";\"10000.00\";\"\";\"30103\"" ;
56
		assertTrue(plugin.verifyQuotes(s, ';'));
57
	}
58

  
59
	@Test
60
	public void testSNSF2(){
61
		String s = "\"11000\";\"4021-011000\";\"Literarische und nationale Erziehung : Schweizerisches Selbstverständnis in der Literatur für Kinder und Jugend- liche\";\"\";\"Tschirky Rosmarie\";\"NRP 21 Cultural Diversity and National Identity\";\"Programmes;National Research Programmes (NRPs)\";\"Schweiz. Inst. für Kinder- und Jugendmedien\";\"Universität Zürich - ZH\";\"10501\";\"German and English languages and literature\";\"Human and Social Sciences;Linguistics and literature, philosophy\";\"10501\";\"01.10.1986\";\"31.03.1990\";\"308807.00\";\"\"";
62
		assertTrue(plugin.verifyQuotes(s, ';'));
63
	}
64

  
65
	@Test
66
	public void testSNSFInvalid(){
67
		String s = "\"35918\";\"1113-035918\";\"Entwicklung eines dreisprachigen Thesaurus des schweizerischen Rechts zur Unterstützung der Suche in Volltextdatenbanken.\";\"\";\"Verein \"Schweizerische Juristische Datenbank\"\";\"Project funding (Div. I-III)\";\"Project funding\";\"Verein \"\"Schweizerische Juristische Datenbank\"\"\";\"NPO (Biblioth., Museen, Verwalt.) - NPO\";\"10205\";\"Legal sciences\";\"Human and Social Sciences;Economics, law\";\"10205\";\"01.12.1992\";\"31.03.1995\";\"500366.00\";\"\"";
68
		assertFalse(plugin.verifyQuotes(s, ';'));
69
	}
70

  
71
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/test/java/eu/dnetlib/data/collector/plugins/csv/CSVCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.csv;
2

  
3
import java.net.URISyntaxException;
4
import java.net.URL;
5
import java.util.HashMap;
6

  
7
import org.junit.Assert;
8
import org.junit.Test;
9

  
10
import eu.dnetlib.data.collector.plugins.FileCSVCollectorPlugin;
11
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
12
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
13

  
14
public class CSVCollectorPluginTest {
15

  
16
	@Test
17
	public void testCSVHeader() throws URISyntaxException, CollectorServiceException {
18
		URL resource = CSVCollectorPluginTest.class.getResource("/eu/dnetlib/data/collector/filesystem/csv/input.tsv");
19
		InterfaceDescriptor descr = new InterfaceDescriptor();
20
		HashMap<String, String> params = new HashMap<String, String>();
21
		params.put("header", "TrUe");
22
		params.put("separator", "\t");
23
		params.put("identifier", "56");
24
		descr.setBaseUrl(resource.toString());
25
		descr.setParams(params);
26
		FileCSVCollectorPlugin plugin = new FileCSVCollectorPlugin();
27
		int i = 0;
28
		for (String s : plugin.collect(descr, null, null)) {
29
			Assert.assertTrue(s.length() > 0);
30
			i++;
31
			System.out.println(s);
32
			break;
33
		}
34
		Assert.assertTrue(i > 0);
35

  
36
	}
37

  
38

  
39
	@Test
40
	public void testTSVQuote() throws URISyntaxException, CollectorServiceException {
41
		URL resource = CSVCollectorPluginTest.class.getResource("/eu/dnetlib/data/collector/filesystem/csv/input-quoted.tsv");
42
		InterfaceDescriptor descr = new InterfaceDescriptor();
43
		HashMap<String, String> params = new HashMap<String, String>();
44
		params.put("header", "true");
45
		params.put("separator", ";");
46
		params.put("identifier", "0");
47
		params.put("quote", "\\\"");
48
		descr.setBaseUrl(resource.toString());
49
		descr.setParams(params);
50
		FileCSVCollectorPlugin plugin = new FileCSVCollectorPlugin();
51
		int i = 0;
52
		for (String s : plugin.collect(descr, null, null)) {
53
			Assert.assertTrue(s.length() > 0);
54
			i++;
55
			System.out.println(s);
56
			break;
57
		}
58
		Assert.assertTrue(i > 0);
59

  
60
	}
61
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/test/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5
import static org.mockito.Mockito.verify;
6
import static org.mockito.Mockito.when;
7

  
8
import java.util.ArrayList;
9
import java.util.HashMap;
10
import java.util.Iterator;
11
import java.util.List;
12

  
13
import org.junit.Before;
14
import org.junit.Test;
15
import org.junit.runner.RunWith;
16
import org.mockito.Mock;
17
import org.mockito.internal.verification.Times;
18
import org.mockito.junit.MockitoJUnitRunner;
19

  
20
import com.google.common.base.Joiner;
21
import com.google.common.collect.Lists;
22

  
23
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
24
import eu.dnetlib.data.collector.rmi.ProtocolDescriptor;
25
import eu.dnetlib.data.collector.rmi.ProtocolParameter;
26

  
27
@RunWith(MockitoJUnitRunner.class)
28
public class OaiCollectorPluginTest {
29

  
30
	private OaiCollectorPlugin oai;
31

  
32
	@Mock
33
	private OaiIteratorFactory oaiIteratorFactory;
34

  
35
	private List<String> elements = Lists.newArrayList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12");
36

  
37
	private Iterator<String> oaiIterator1 = elements.subList(0, 3).iterator();
38
	private Iterator<String> oaiIterator2 = elements.subList(3, 7).iterator();
39
	private Iterator<String> oaiIterator3 = elements.subList(7, elements.size()).iterator();
40

  
41
	private static final String BASE_URL = "http://oai.test.it/oai";
42
	private static final String FORMAT = "oai_dc";
43
	private static final String PROTOCOL = "OAI";
44
	private static final String SET_1 = "set01";
45
	private static final String SET_2 = "set02";
46
	private static final String SET_3 = "set03";
47

  
48
	@Before
49
	public void setUp() {
50
		oai = new OaiCollectorPlugin();
51
		oai.setOaiIteratorFactory(oaiIteratorFactory);
52
		oai.setProtocolDescriptor(new ProtocolDescriptor(PROTOCOL, new ArrayList<ProtocolParameter>()));
53
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_1, null, null)).thenReturn(oaiIterator1);
54
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_2, null, null)).thenReturn(oaiIterator2);
55
		when(oaiIteratorFactory.newIterator(BASE_URL, FORMAT, SET_3, null, null)).thenReturn(oaiIterator3);
56
	}
57

  
58
	public void test() {
59
		oai = new OaiCollectorPlugin();
60
	}
61

  
62
	@Test
63
	public void testGetProtocol() {
64
		assertEquals(PROTOCOL, oai.getProtocol());
65
	}
66

  
67
	@Test
68
	public void testCollect() throws Exception {
69
		final InterfaceDescriptor iface = new InterfaceDescriptor();
70
		iface.setId("123");
71
		iface.setProtocol(PROTOCOL);
72
		iface.setBaseUrl(BASE_URL);
73
		iface.setParams(new HashMap<String, String>());
74
		iface.getParams().put("format", FORMAT);
75
		iface.getParams().put("set", Joiner.on(", ").join(SET_1, SET_2, SET_3));
76

  
77
		final Iterable<String> records = oai.collect(iface, null, null);
78

  
79
		assertNotNull(records);
80
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_1, null, null);
81
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_2, null, null);
82
		verify(oaiIteratorFactory, new Times(0)).newIterator(BASE_URL, FORMAT, SET_3, null, null);
83

  
84
		int count = 0;
85
		for (String s : records) {
86
			System.out.println("RECORD: " + s);
87
			assertEquals("" + count, s);
88
			count++;
89
		}
90
		assertEquals(elements.size(), count);
91
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_1, null, null);
92
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_2, null, null);
93
		verify(oaiIteratorFactory).newIterator(BASE_URL, FORMAT, SET_3, null, null);
94
	}
95
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/oai/engine/XmlCleaner.java
1
package eu.dnetlib.data.collector.plugins.oai.engine;
2

  
3
import java.util.HashMap;
4
import java.util.HashSet;
5
import java.util.Map;
6
import java.util.Set;
7
import java.util.regex.Pattern;
8

  
9
/**
10
 * @author jochen
11
 *
12
 */
13
public class XmlCleaner {
14
	/**
15
	 * Pattern for numeric entities.
16
	 */
17
	private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
18
	//	    private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
19
	private static Pattern invalidControlCharPattern = Pattern.compile("&#11;");
20
	/**
21
	 * Pattern that negates the allowable XML 4 byte unicode characters. Valid
22
	 * are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
23
	 * [#x10000-#x10FFFF]
24
	 */
25
	private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
26

  
27
	// Map entities to their unicode equivalent
28
	private static Set<String> goodEntities = new HashSet<String>();
29
	private static Map<String, String> badEntities = new HashMap<String, String>();
30
	
31
	static {
32
		// pre-defined XML entities
33
		goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark
34
		goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand
35
		goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign
36
		goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign
37
		// control entities
38
		//badEntities.put("&#11;", "");
39
		badEntities.put("&#127;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
40
		badEntities.put("&#128;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
41
		badEntities.put("&#129;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
42
		badEntities.put("&#130;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
43
		badEntities.put("&#131;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
44
		badEntities.put("&#132;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
45
		badEntities.put("&#133;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
46
		badEntities.put("&#134;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
47
		badEntities.put("&#135;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
48
		badEntities.put("&#136;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
49
		badEntities.put("&#137;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
50
		badEntities.put("&#138;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
51
		badEntities.put("&#139;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
52
		badEntities.put("&#140;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
53
		badEntities.put("&#141;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
54
		badEntities.put("&#142;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
55
		badEntities.put("&#143;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
56
		badEntities.put("&#144;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
57
		badEntities.put("&#145;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
58
		badEntities.put("&#146;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
59
		badEntities.put("&#147;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
60
		badEntities.put("&#148;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
61
		badEntities.put("&#149;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
62
		badEntities.put("&#150;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
63
		badEntities.put("&#151;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
64
		badEntities.put("&#152;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
65
		badEntities.put("&#153;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
66
		badEntities.put("&#154;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
67
		badEntities.put("&#155;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
68
		badEntities.put("&#156;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
69
		badEntities.put("&#157;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
70
		badEntities.put("&#158;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
71
		badEntities.put("&#159;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
72
		// misc entities
73
		badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
74
		badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
75
		badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
76
		// Latin 1 entities
77
		badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
78
		badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
79
		badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
80
		badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
81
		badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
82
		badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
83
		badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
84
		badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
85
		badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
86
		badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
87
		badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
88
		badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
89
		badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
90
		badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
91
		badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
92
		badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
93
		badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
94
		badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
95
		badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
96
		badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
97
		badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
98
		badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
99
		badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
100
		badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
101
		badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
102
		badEntities.put("&sup1;", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one
103
		badEntities.put("&ordm;", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
104
		badEntities.put("&raquo;", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
105
		badEntities.put("&frac14;", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
106
		badEntities.put("&frac12;", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
107
		badEntities.put("&frac34;", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
108
		badEntities.put("&iquest;", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
109
		badEntities.put("&Agrave;", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
110
		badEntities.put("&Aacute;", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
111
		badEntities.put("&Acirc;", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
112
		badEntities.put("&Atilde;", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
113
		badEntities.put("&Auml;", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
114
		badEntities.put("&Aring;", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
115
		badEntities.put("&AElig;", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
116
		badEntities.put("&Ccedil;", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
117
		badEntities.put("&Egrave;", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
118
		badEntities.put("&Eacute;", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
119
		badEntities.put("&Ecirc;", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
120
		badEntities.put("&Euml;", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
121
		badEntities.put("&Igrave;", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
122
		badEntities.put("&Iacute;", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
123
		badEntities.put("&Icirc;", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
124
		badEntities.put("&Iuml;", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
125
		badEntities.put("&ETH;", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
126
		badEntities.put("&Ntilde;", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
127
		badEntities.put("&Ograve;", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
128
		badEntities.put("&Oacute;", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
129
		badEntities.put("&Ocirc;", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
130
		badEntities.put("&Otilde;", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
131
		badEntities.put("&Ouml;", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
132
		badEntities.put("&times;", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
133
		badEntities.put("&Oslash;", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
134
		badEntities.put("&Ugrave;", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
135
		badEntities.put("&Uacute;", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
136
		badEntities.put("&Ucirc;", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
137
		badEntities.put("&Uuml;", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
138
		badEntities.put("&Yacute;", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
139
		badEntities.put("&THORN;", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
140
		badEntities.put("&szlig;", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
141
		badEntities.put("&agrave;", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
142
		badEntities.put("&aacute;", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
143
		badEntities.put("&acirc;", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
144
		badEntities.put("&atilde;", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
145
		badEntities.put("&auml;", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
146
		badEntities.put("&aring;", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
147
		badEntities.put("&aelig;", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
148
		badEntities.put("&ccedil;", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
149
		badEntities.put("&egrave;", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
150
		badEntities.put("&eacute;", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
151
		badEntities.put("&ecirc;", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
152
		badEntities.put("&euml;", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
153
		badEntities.put("&igrave;", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
154
		badEntities.put("&iacute;", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
155
		badEntities.put("&icirc;", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
156
		badEntities.put("&iuml;", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
157
		badEntities.put("&eth;", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
158
		badEntities.put("&ntilde;", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
159
		badEntities.put("&ograve;", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
160
		badEntities.put("&oacute;", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
161
		badEntities.put("&ocirc;", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
162
		badEntities.put("&otilde;", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
163
		badEntities.put("&ouml;", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
164
		badEntities.put("&divide;", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign
165
		badEntities.put("&oslash;", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
166
		badEntities.put("&ugrave;", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
167
		badEntities.put("&uacute;", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
168
		badEntities.put("&ucirc;", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
169
		badEntities.put("&uuml;", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
170
		badEntities.put("&yacute;", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
171
		badEntities.put("&thorn;", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
172
		badEntities.put("&yuml;", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
173
	}
174
	/**
175
	 * For each entity in the input that is not allowed in XML, replace the
176
	 * entity with its unicode equivalent or remove it. For each instance of a
177
	 * bare &, replace it with &amp;<br/>
178
	 * XML only allows 4 entities: &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
179
	 *
180
	 * @param broken
181
	 *            the string to handle entities
182
	 * @return the string with entities appropriately fixed up
183
	 */
184
	static public String cleanAllEntities(final String broken) {
185
		if (broken == null) {
186
			return null;
187
		}
188

  
189
		String working = invalidControlCharPattern.matcher(broken).replaceAll("");
190
		working = invalidCharacterPattern.matcher(working).replaceAll("");
191
		
192
		int cleanfrom = 0;
193
		
194
		while (true) {
195
			int amp = working.indexOf('&', cleanfrom);
196
			// If there are no more amps then we are done
197
			if (amp == -1) {
198
				break;
199
			}
200
			// Skip references of the kind &#ddd;
201
			if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) {
202
				cleanfrom = working.indexOf(';', amp) + 1;
203
				continue;
204
			}
205
			int i = amp + 1;
206
			while (true) {
207
				// if we are at the end of the string then just escape the '&';
208
				if (i >= working.length()) {
209
					return working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
210
				}
211
				// if we have come to a ; then we have an entity
212
				// If it is something that xml can't handle then replace it.
213
				char c = working.charAt(i);
214
				if (c == ';') {
215
					final String entity = working.substring(amp, i + 1);
216
					final String replace = handleEntity(entity);
217
					working = working.substring(0, amp) + replace + working.substring(i + 1);
218
					break;
219
				}
220
				// Did we end an entity without finding a closing ;
221
				// Then treat it as an '&' that needs to be replaced with &amp;
222
				if (!Character.isLetterOrDigit(c)) {
223
					working = working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
224
					amp = i + 4; // account for the 4 extra characters
225
					break;
226
				}
227
				i++;
228
			}
229
			cleanfrom = amp + 1;
230
		}
231

  
232
		if (Pattern.compile("<<").matcher(working).find()) {
233
			working = working.replaceAll("<<", "&lt;&lt;");
234
		}
235

  
236
		if (Pattern.compile(">>").matcher(working).find()) {
237
			working = working.replaceAll(">>", "&gt;&gt;");
238
		}
239
		
240
		return working;
241
	}
242

  
243
	/**
244
	 * Replace entity with its unicode equivalent, if it is not a valid XML
245
	 * entity. Otherwise strip it out. XML only allows 4 entities: &amp;amp;,
246
	 * &amp;quot;, &amp;lt; and &amp;gt;.
247
	 *
248
	 * @param entity
249
	 *            the entity to be replaced
250
	 * @return the substitution for the entity, either itself, the unicode
251
	 *         equivalent or an empty string.
252
	 */
253
	private static String handleEntity(final String entity) {
254
		if (goodEntities.contains(entity)) {
255
			return entity;
256
		}
257
		
258
		final String replace = (String) badEntities.get(entity);
259
		if (replace != null) {
260
			return replace;
261
		}
262
		
263
		return replace != null ? replace : "";
264
	}
265
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/test/java/eu/dnetlib/data/collector/plugins/oai/HttpConnectorTest.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import org.junit.Before;
4
import org.junit.Ignore;
5
import org.junit.Test;
6

  
7
import eu.dnetlib.data.collector.plugins.oai.engine.HttpConnector;
8
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
9

  
10
public class HttpConnectorTest {
11

  
12
	private HttpConnector connector;
13

  
14
	private static final String URL = "https://researchdata.ands.org.au/registry/services/oai?verb=Identify";
15

  
16
	@Before
17
	public void setUp() {
18
		connector = new HttpConnector();
19
		connector.initTrustManager();
20
		connector.setMaxNumberOfRetry(1);
21
	}
22

  
23
	@Test
24
	@Ignore
25
	public void testGetInputSource() throws CollectorServiceException {
26
		System.out.println(connector.getInputSource(URL));
27
	}
28
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/test/java/eu/dnetlib/data/collector/plugins/oai/OaiIteratorTest.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import org.junit.Before;
4
import org.junit.Ignore;
5
import org.junit.Test;
6

  
7
import eu.dnetlib.data.collector.plugins.oai.engine.HttpConnector;
8

  
9
public class OaiIteratorTest {
10
	
11
	private static final String BASE_URL = "http://oai.d.efg.research-infrastructures.eu/oai.do";
12
	private static final String FORMAT = "oai_dc";
13
	private static final String SET = "d937bab1-d44c-44aa-bf7d-df5312a3b623";
14
	
15
	private OaiIterator oai;
16
	
17
	@Before
18
	public void setUp() {
19
		HttpConnector httpConnector = new HttpConnector();
20
		httpConnector.initTrustManager();
21
		oai = new OaiIterator(BASE_URL, FORMAT, SET, null, null, httpConnector);
22
	}
23
	
24
	@Test
25
	@Ignore
26
	public void test() {
27
		int count = 0;
28
		while (oai.hasNext()) {
29
			oai.next();
30
			count++;
31
		}
32
		System.out.println("TOTAL: " + count);
33
	}
34
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import java.util.Iterator;
4
import java.util.List;
5

  
6
import org.springframework.beans.factory.annotation.Required;
7

  
8
import com.google.common.base.Function;
9
import com.google.common.base.Splitter;
10
import com.google.common.collect.Iterables;
11
import com.google.common.collect.Iterators;
12
import com.google.common.collect.Lists;
13

  
14
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
15
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
16
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
17

  
18
public class OaiCollectorPlugin extends AbstractCollectorPlugin {
19

  
20
	private static final String FORMAT_PARAM = "format";
21
	private static final String OAI_SET_PARAM = "set";
22

  
23
	private OaiIteratorFactory oaiIteratorFactory;
24

  
25
	@Override
26
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
27
			throws CollectorServiceException {
28
		final String baseUrl = interfaceDescriptor.getBaseUrl();
29
		final String mdFormat = interfaceDescriptor.getParams().get(FORMAT_PARAM);
30
		final String setParam = interfaceDescriptor.getParams().get(OAI_SET_PARAM);
31
		final List<String> sets = Lists.newArrayList();
32
		if (setParam != null) {
33
			sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
34
		}
35
		if (sets.isEmpty()) {
36
			// If no set is defined, ALL the sets must be harvested
37
			sets.add("");
38
		}
39

  
40
		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
41

  
42
		if (mdFormat == null || mdFormat.isEmpty()) { throw new CollectorServiceException("Param 'mdFormat' is null or empty"); }
43

  
44
		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
45

  
46
		if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + untilDate); }
47

  
48
		return new Iterable<String>() {
49

  
50
			@SuppressWarnings("unchecked")
51
			@Override
52
			public Iterator<String> iterator() {
53
				final Iterable<Iterator<String>> iter = Iterables.transform(sets, new Function<String, Iterator<String>>() {
54

  
55
					@Override
56
					public Iterator<String> apply(final String set) {
57
						return oaiIteratorFactory.newIterator(baseUrl, mdFormat, set, fromDate, untilDate);
58
					}
59
				});
60
				return Iterators.concat(Iterables.toArray(iter, Iterator.class));
61
			}
62
		};
63
	}
64

  
65
	public OaiIteratorFactory getOaiIteratorFactory() {
66
		return oaiIteratorFactory;
67
	}
68

  
69
	@Required
70
	public void setOaiIteratorFactory(final OaiIteratorFactory oaiIteratorFactory) {
71
		this.oaiIteratorFactory = oaiIteratorFactory;
72
	}
73

  
74
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/httplist/HttpListCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.httplist;
2

  
3
import java.util.Iterator;
4

  
5
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
6
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
8

  
9
public class HttpListCollectorPlugin extends AbstractCollectorPlugin {
10

  
11
	@Override
12
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
13
			throws CollectorServiceException {
14
		final String baseUrl = interfaceDescriptor.getBaseUrl();
15
		final String listAddress = interfaceDescriptor.getParams().get("listUrl");
16

  
17
		return new Iterable<String>() {
18

  
19
			@Override
20
			public Iterator<String> iterator() {
21
				return new HttpListIterator(baseUrl, listAddress);
22
			}
23
		};
24
	}
25
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.3.11/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java
1
package eu.dnetlib.data.collector.plugins.oai;
2

  
3
import java.io.StringReader;
4
import java.io.UnsupportedEncodingException;
5
import java.net.URLEncoder;
6
import java.util.Iterator;
7
import java.util.Queue;
8
import java.util.concurrent.PriorityBlockingQueue;
9

  
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.dom4j.Document;
13
import org.dom4j.DocumentException;
14
import org.dom4j.Node;
15
import org.dom4j.io.SAXReader;
16

  
17
import eu.dnetlib.data.collector.plugins.oai.engine.HttpConnector;
18
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
19
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
20

  
21
public class OaiIterator implements Iterator<String> {
22

  
23
	private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
24

  
25
	private Queue<String> queue = new PriorityBlockingQueue<String>();
26
	private SAXReader reader = new SAXReader();
27

  
28
	private String baseUrl;
29
	private String set;
30
	private String mdFormat;
31
	private String fromDate;
32
	private String untilDate;
33
	private String token;
34
	private boolean started;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff