Project

General

Profile

« Previous | Next » 

Revision 38180

[maven-release-plugin] copy for tag dnet-modular-collector-service-3.1.12

View differences:

modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpFileWalker.java
1
package eu.dnetlib.data.collector.plugins.ftp;
2

  
3
import java.io.IOException;
4
import java.util.Collection;
5
import java.util.concurrent.BlockingQueue;
6

  
7
import org.apache.commons.io.filefilter.FileFilterUtils;
8
import org.apache.commons.io.filefilter.HiddenFileFilter;
9
import org.apache.commons.io.filefilter.IOFileFilter;
10
import org.apache.commons.io.filefilter.SuffixFileFilter;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.apache.commons.net.ftp.FTPFile;
14
import org.apache.commons.net.ftp.FTPFileFilter;
15
import org.xml.sax.SAXException;
16

  
17
/**
18
 * FTPFileWalker runs recursively under a FTP directory structure starting from a given path and for each file reads it's content and puts
19
 * it in a shared queue.
20
 * 
21
 * Acts as a producer.
22
 * 
23
 * @author sandro
24
 * 
25
 * @param <T>
26
 *            Type of expected content to extract from files NB. Generally strings.
27
 */
28
public class FtpFileWalker<T> extends FtpDirectoryWalker<T> {
29

  
30
	/**
31
	 * Logger.
32
	 */
33
	private static final Log log = LogFactory.getLog(FtpFileWalker.class);
34

  
35
	/**
36
	 * Shared queue.
37
	 */
38
	private final BlockingQueue<T> queue;
39

  
40
	public static String IGNORE_PREFIX = ".";
41

  
42
	public static String IGNORE_SUFFIX = "~";
43

  
44
	static IOFileFilter fileFilter = FileFilterUtils.notFileFilter(FileFilterUtils.or(new SuffixFileFilter("~"), HiddenFileFilter.HIDDEN));
45

  
46
	/**
47
	 * Instantiates a new fTP file walker.
48
	 * 
49
	 * @param queue
50
	 *            the queue
51
	 * @param filter
52
	 *            the filter
53
	 * @param depthLimit
54
	 *            the depth limit
55
	 * @param itemParam
56
	 *            the item param
57
	 * @param protocol
58
	 *            the protocol
59
	 */
60
	public FtpFileWalker(final BlockingQueue<T> queue, final FTPFileFilter filter, final int depthLimit, final ItemUtility itemParam, final String protocol) {
61
		super(filter, depthLimit, itemParam, protocol);
62
		this.queue = queue;
63
	}
64

  
65
	@SuppressWarnings("unchecked")
66
	public void doWalk(final String source) throws IOException {
67
		try {
68
			initialize();
69
		} catch (Exception e) {
70
			enqueue(queue, (T) FtpIterable.done);
71
			throw new IllegalStateException(e);
72
		}
73

  
74
		FTPFile[] files = clientProvider.listFiles(source);
75
		if (files.length == 1) {
76
			if (source.contains(files[0].getName())) {
77
				enqueue(queue, (T) getFileFrom(source));
78
			} else {
79
				walk(source, queue);
80
			}
81
		} else if (files.length > 0) {
82
			walk(source, queue);
83
		}
84
		enqueue(queue, (T) FtpIterable.done);
85
	}
86

  
87
	@Override
88
	@SuppressWarnings({ "unchecked", "rawtypes" })
89
	protected void handleFile(final FTPFile file, final int depth, final Collection results) throws IOException {
90
		if (file.getName().endsWith(IGNORE_SUFFIX)) return;
91
		T myFile = (T) readFile(file);
92
		if (myFile == null) return;
93
		enqueue((BlockingQueue<T>) results, myFile);
94
	}
95

  
96
	@Override
97
	@SuppressWarnings({ "unchecked", "rawtypes" })
98
	protected boolean handleDirectory(final String directory, final int depth, final Collection results) throws IOException {
99
		String[] tmp = directory.split("/");
100
		if ((tmp.length > 0) && (tmp[tmp.length - 1].startsWith(IGNORE_PREFIX))) return false;
101
		return super.handleDirectory(directory, depth, results);
102
	}
103

  
104
	// ///Utility
105

  
106
	/**
107
	 * Adds the element to the queue
108
	 */
109
	private void enqueue(final BlockingQueue<T> queue, final T element) {
110
		try {
111
			queue.put(element);
112
			// In order to avoid orphan FTP walkers, a better version would be..
113
			// if (!queue.offer(element, 5, TimeUnit.MINUTES)) {
114
			// log.info("Nobody is consuming my records! I stop gracefully!");
115
			// }
116
		} catch (InterruptedException e) {
117
			log.warn("Hopssss... ", e);
118
		}
119
	}
120

  
121
	/**
122
	 * given a file, return its content as a string
123
	 * 
124
	 * @param file
125
	 *            the source
126
	 * @return the file content as a single string
127
	 * @throws IOException
128
	 * @throws TikaException
129
	 * @throws SAXException
130
	 */
131
	private String readFile(final FTPFile file) throws IOException {
132

  
133
		if (getCurrentDirectory() == null) {
134
			clientProvider.completePendingCommand();
135
			return getCurrentDirectory();
136
		}
137
		String fileContent = getFileFrom(getCurrentDirectory() + "/" + file.getName());
138
		// remove BOM if present
139
		if (fileContent.startsWith("\uFEFF")) {
140
			fileContent = fileContent.substring(1);
141
		}
142
		return fileContent;
143
	}
144

  
145
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/ftp/ClientSftpDataProvider.java
1
package eu.dnetlib.data.collector.plugins.ftp;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.io.StringWriter;
6
import java.util.ArrayList;
7
import java.util.Vector;
8

  
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.apache.commons.net.ftp.FTPFile;
13

  
14
import com.jcraft.jsch.Channel;
15
import com.jcraft.jsch.ChannelSftp;
16
import com.jcraft.jsch.ChannelSftp.LsEntry;
17
import com.jcraft.jsch.JSch;
18
import com.jcraft.jsch.JSchException;
19
import com.jcraft.jsch.Session;
20
import com.jcraft.jsch.SftpATTRS;
21
import com.jcraft.jsch.SftpException;
22
import com.jcraft.jsch.UserInfo;
23

  
24
/**
25
 * The Class ClientSFTPDataProvider.
26
 */
27
public class ClientSftpDataProvider implements FtpClientProvider {
28

  
29
	/** The Constant log. */
30
	private static final Log log = LogFactory.getLog(ClientSftpDataProvider.class);
31

  
32
	/** The session. */
33
	private Session session;
34

  
35
	/** The sftp channel. */
36
	private ChannelSftp sftpChannel;
37

  
38
	/** The port. */
39
	private int port;
40

  
41
	/** The item param. */
42
	private ItemUtility itemParam;
43

  
44
	/*
45
	 * (non-Javadoc)
46
	 * 
47
	 * @see eu.dnetlib.data.collective.harvest.provider.IClientProvider#connect()
48
	 */
49
	@Override
50
	public void connect() {
51
		try {
52
			JSch jsch = new JSch();
53
			port = 22;
54
			session = jsch.getSession(itemParam.getUsername(), itemParam.getHost(), port);
55
			UserSFTP muser = new UserSFTP();
56
			muser.setPassword(itemParam.getPassword());
57
			session.setUserInfo(muser);
58
			session.connect();
59
			Channel channel;
60
			channel = session.openChannel("sftp");
61
			channel.connect();
62
			sftpChannel = (ChannelSftp) channel;
63
		} catch (JSchException e) {
64
			e.printStackTrace();
65
		}
66
	}
67

  
68
	/*
69
	 * (non-Javadoc)
70
	 * 
71
	 * @see eu.dnetlib.data.collective.harvest.provider.IClientProvider#disconnect()
72
	 */
73
	@Override
74
	public void disconnect() {
75
		session.disconnect();
76
	}
77

  
78
	/*
79
	 * (non-Javadoc)
80
	 * 
81
	 * @see eu.dnetlib.data.collective.harvest.provider.ftp.FTPClientProvider#changeWorkingDirectory(java.lang.String)
82
	 */
83
	@Override
84
	public boolean changeWorkingDirectory(final String path) {
85
		boolean b = false;
86
		try {
87
			sftpChannel.cd(path);
88
			b = true;
89
		} catch (SftpException e) {
90
			b = false;
91
		}
92
		return b;
93
	}
94

  
95
	/*
96
	 * (non-Javadoc)
97
	 * 
98
	 * @see eu.dnetlib.data.collective.harvest.provider.ftp.FTPClientProvider#listFiles()
99
	 */
100
	@SuppressWarnings("unchecked")
101
	@Override
102
	public FTPFile[] listFiles() {
103
		ArrayList<FTPFile> tmp = new ArrayList<FTPFile>();
104
		try {
105
			Vector<LsEntry> v = sftpChannel.ls(sftpChannel.pwd());
106
			for (LsEntry obj : v) {
107

  
108
				if (obj.getFilename().startsWith(".") == false) {
109

  
110
					FTPFile afile = new FTPFile();
111
					afile.setName(obj.getFilename());
112
					SftpATTRS attributes = obj.getAttrs();
113
					if (attributes.isDir()) {
114
						afile.setType(1);
115
					} else {
116
						afile.setType(0);
117
					}
118
					tmp.add(afile);
119
				}
120

  
121
			}
122
		} catch (SftpException e) {
123
			e.printStackTrace();
124
		}
125
		FTPFile[] files = new FTPFile[tmp.size()];
126
		files = tmp.toArray(files);
127
		return files;
128
	}
129

  
130
	/*
131
	 * (non-Javadoc)
132
	 * 
133
	 * @see eu.dnetlib.data.collective.harvest.provider.ftp.FTPClientProvider#listFiles(java.lang.String)
134
	 */
135
	@SuppressWarnings("unchecked")
136
	@Override
137
	public FTPFile[] listFiles(final String source) {
138
		ArrayList<FTPFile> tmp = new ArrayList<FTPFile>();
139
		try {
140
			Vector<LsEntry> v = sftpChannel.ls(source);
141
			for (LsEntry obj : v) {
142

  
143
				if (obj.getFilename().startsWith(".") == false) {
144

  
145
					FTPFile afile = new FTPFile();
146
					afile.setName(obj.getFilename());
147
					SftpATTRS attributes = obj.getAttrs();
148
					if (attributes.isDir()) {
149
						afile.setType(1);
150
					} else {
151
						afile.setType(0);
152
					}
153
					tmp.add(afile);
154
				}
155

  
156
			}
157
		} catch (SftpException e) {
158
			e.printStackTrace();
159
		}
160
		FTPFile[] files = new FTPFile[tmp.size()];
161
		files = tmp.toArray(files);
162
		return files;
163
	}
164

  
165
	/*
166
	 * (non-Javadoc)
167
	 * 
168
	 * @see eu.dnetlib.data.collective.harvest.provider.ftp.FTPClientProvider#printWorkingDirectory()
169
	 */
170
	@Override
171
	public String printWorkingDirectory() {
172
		String path = null;
173
		try {
174
			path = sftpChannel.pwd();
175
		} catch (SftpException e) {
176
			e.printStackTrace();
177
		}
178
		return path;
179

  
180
	}
181

  
182
	/*
183
	 * (non-Javadoc)
184
	 * 
185
	 * @see eu.dnetlib.data.collective.harvest.provider.IClientProvider#retrieveFileStream(java.lang.String)
186
	 */
187
	@Override
188
	public String retrieveFileStream(final String remote) {
189
		try {
190
			InputStream stream = sftpChannel.get(remote);
191

  
192
			StringWriter writer = new StringWriter();
193
			IOUtils.copy(stream, writer);
194

  
195
			return writer.toString();
196

  
197
		} catch (IOException e) {
198
			log.error("cannot read file: " + remote, e);
199
			throw new RuntimeException(e);
200
		} catch (SftpException e) {
201
			log.error("cannot read file: " + remote, e);
202
			throw new RuntimeException(e);
203
		}
204
	}
205

  
206
	/*
207
	 * (non-Javadoc)
208
	 * 
209
	 * @see eu.dnetlib.data.collective.harvest.provider.ftp.FTPClientProvider#completePendingCommand()
210
	 */
211
	@Override
212
	public void completePendingCommand() throws IOException {
213
		// TODO Auto-generated method stub
214

  
215
	}
216

  
217
	/*
218
	 * (non-Javadoc)
219
	 * 
220
	 * @see
221
	 * eu.dnetlib.data.collective.harvest.provider.IClientProvider#setItemParam(eu.dnetlib.data.collective.harvest.provider.ItemUtility)
222
	 */
223
	@Override
224
	public void setItemParam(final ItemUtility itemUtility) {
225
		this.itemParam = itemUtility;
226
	}
227

  
228
	/*
229
	 * (non-Javadoc)
230
	 * 
231
	 * @see eu.dnetlib.data.collective.harvest.provider.IClientProvider#isConnected()
232
	 */
233
	@Override
234
	public boolean isConnected() {
235
		return sftpChannel.isConnected();
236
	}
237
}
238

  
239
class UserSFTP implements UserInfo {
240

  
241
	String password;
242

  
243
	@Override
244
	public String getPassphrase() {
245
		return null;
246
	}
247

  
248
	public void setPassword(final String password) {
249
		this.password = password;
250
	}
251

  
252
	@Override
253
	public String getPassword() {
254
		// TODO Auto-generated method stub
255
		return password;
256
	}
257

  
258
	@Override
259
	public boolean promptPassphrase(final String arg0) {
260
		// TODO Auto-generated method stub
261
		return false;
262
	}
263

  
264
	@Override
265
	public boolean promptPassword(final String arg0) {
266
		password = "puppamelo";
267
		return true;
268
	}
269

  
270
	@Override
271
	public boolean promptYesNo(final String arg0) {
272
		// TODO Auto-generated method stub
273
		return true;
274
	}
275

  
276
	@Override
277
	public void showMessage(final String arg0) {
278
		// TODO Auto-generated method stub
279

  
280
	}
281

  
282
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterable.java
1
package eu.dnetlib.data.collector.plugins.archive.targz;
2

  
3
import java.io.File;
4
import java.net.MalformedURLException;
5
import java.net.URL;
6
import java.util.Iterator;
7

  
8
import com.google.common.base.Function;
9
import com.google.common.collect.Iterators;
10

  
11
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
12
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
13
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
14

  
15
/**
16
 * The Class TarGzIterable.
17
 *
18
 * @author Andrea
19
 */
20
public class TarGzIterable implements Iterable<String> {
21

  
22
	/** The path to tar.gz archive. */
23
	private File tarGzFile;
24

  
25
	public TarGzIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
26
		try {
27
			final String tarGzPath = interfaceDescriptor.getBaseUrl();
28
			URL tarGzUrl = new URL(tarGzPath);
29
			this.tarGzFile = new File(tarGzUrl.getPath());
30
			if (!tarGzFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", tarGzFile.getPath())); }
31
		} catch (MalformedURLException e) {
32
			throw new CollectorServiceException("TarGz collector failed! ", e);
33
		}
34
	}
35

  
36
	@Override
37
	public Iterator<String> iterator() {
38
		final TarGzIterator tgzIterator = new TarGzIterator(tarGzFile.getAbsolutePath());
39
		return Iterators.transform(tgzIterator, new Function<String, String>() {
40

  
41
			@Override
42
			public String apply(final String inputRecord) {
43
				return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord);
44
			}
45
		});
46
	}
47

  
48
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/FileGZipCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins;
2

  
3
import java.io.BufferedInputStream;
4
import java.io.FileInputStream;
5
import java.net.URL;
6
import java.util.zip.GZIPInputStream;
7

  
8
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
9

  
10
public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
11

  
12
	@Override
13
	protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException {
14

  
15
		try {
16
			GZIPInputStream stream = new GZIPInputStream(new FileInputStream(new URL(baseUrl).getPath()));
17
			return new BufferedInputStream(stream);
18
		} catch (Exception e) {
19
			throw new CollectorServiceException(e);
20
		}
21
	}
22

  
23
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpDirectoryWalker.java
1
package eu.dnetlib.data.collector.plugins.ftp;
2

  
3
import java.io.IOException;
4
import java.util.Collection;
5

  
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.apache.commons.net.ftp.FTPFile;
9
import org.apache.commons.net.ftp.FTPFileFilter;
10

  
11
/**
12
 * FTPDirectoryWalker runs recursively under a FTP directory structure starting from a given path.
13
 * 
14
 * Acts as a producer.
15
 * 
16
 * @author Sandro
17
 * 
18
 * @param <T>
19
 *            Type of expected content to extract from files NB. Generally strings.
20
 */
21

  
22
public abstract class FtpDirectoryWalker<T> {
23

  
24
	/**
25
	 * logger
26
	 */
27
	private static final Log log = LogFactory.getLog(FtpDirectoryWalker.class); // NOPMD by marko on 11/24/08 5:02 PM
28

  
29
	/**
30
	 * The file filter to use to filter files and directories.
31
	 */
32
	// private final FTPFileFilter filter;
33

  
34
	/**
35
	 * The limit on the directory depth to walk.
36
	 */
37
	private final int depthLimit;
38

  
39
	private ItemUtility itemParam;
40

  
41
	protected FtpClientProvider clientProvider;
42

  
43
	private FTPFileFilter filter;
44

  
45
	/**
46
	 * Construct an FTP instance with no filtering and unlimited <i>depth</i>.
47
	 * 
48
	 * @param server
49
	 *            the server fTP to connect
50
	 * @param username
51
	 *            the username parameter for the fTP connection, if null the connection is anonymous
52
	 * @param password
53
	 *            the password parameter for the FTP connection
54
	 */
55
	protected FtpDirectoryWalker(final ItemUtility itemParam) {
56
		this(null, 0, itemParam, "ftp");
57
	}
58

  
59
	/**
60
	 * Construct an instance with a filter and limit the <i>depth</i> navigated to.
61
	 * <p>
62
	 * The filter controls which files and directories will be navigated to as part of the walk.
63
	 * 
64
	 * @param filter
65
	 *            the filter to apply, null means visit all files
66
	 * @param depthLimit
67
	 *            controls how <i>deep</i> the hierarchy is navigated to (less than 0 means unlimited)
68
	 * @param server
69
	 *            the server fTP to connect
70
	 * @param username
71
	 *            the username parameter for the fTP connection, if null the connection is anonymous
72
	 * @param password
73
	 *            the password parameter for the FTP connection
74
	 * 
75
	 */
76
	protected FtpDirectoryWalker(final FTPFileFilter filter, final int depthLimit, final ItemUtility itemParam, final String protocol) {
77
		this.filter = filter;
78
		this.depthLimit = depthLimit;
79
		this.itemParam = itemParam;
80

  
81
		clientProvider = (FtpClientProvider) ClientProviderFactory.getProvider("ftp");
82

  
83
	}
84

  
85
	/**
86
	 * Overridable callback method invoked at the start of processing.
87
	 * <p>
88
	 * This implementation does nothing.
89
	 * 
90
	 * @param startDirectory
91
	 *            the directory to start from
92
	 * @param results
93
	 *            the collection of result objects, may be updated
94
	 * @throws IOException
95
	 *             if an I/O Error occurs
96
	 */
97
	protected void handleStart(final String startDirectory, final Collection<T> results) throws IOException {
98
		// do nothing - overridable by subclass
99
	}
100

  
101
	/**
102
	 * Overridable callback method invoked to determine if a directory should be processed.
103
	 * <p>
104
	 * This method returns a boolean to indicate if the directory should be examined or not. If you return false, the entire directory and
105
	 * any subdirectories will be skipped. Note that this functionality is in addition to the filtering by file filter.
106
	 * <p>
107
	 * This implementation does nothing and returns true.
108
	 * 
109
	 * @param directory
110
	 *            the current directory being processed
111
	 * @param depth
112
	 *            the current directory level (starting directory = 0)
113
	 * @param results
114
	 *            the collection of result objects, may be updated
115
	 * @return true to process this directory, false to skip this directory
116
	 * @throws IOException
117
	 *             if an I/O Error occurs
118
	 */
119
	protected boolean handleDirectory(final String directory, final int depth, final Collection<T> results) throws IOException {
120
		// do nothing - overridable by subclass
121
		return true; // process directory
122
	}
123

  
124
	/**
125
	 * Overridable callback method invoked at the start of processing each directory.
126
	 * <p>
127
	 * This implementation does nothing.
128
	 * 
129
	 * @param directory
130
	 *            the current directory being processed
131
	 * @param depth
132
	 *            the current directory level (starting directory = 0)
133
	 * @param results
134
	 *            the collection of result objects, may be updated
135
	 * @throws IOException
136
	 *             if an I/O Error occurs
137
	 */
138
	protected void handleDirectoryStart(final String directory, final int depth, final Collection<T> results) throws IOException {
139
		// do nothing - overridable by subclass
140
	}
141

  
142
	/**
143
	 * Overridable callback method invoked for each (non-directory) file.
144
	 * <p>
145
	 * This implementation does nothing.
146
	 * 
147
	 * @param file
148
	 *            the current file being processed
149
	 * @param depth
150
	 *            the current directory level (starting directory = 0)
151
	 * @param results
152
	 *            the collection of result objects, may be updated
153
	 * @throws IOException
154
	 *             if an I/O Error occurs
155
	 */
156
	protected void handleFile(final FTPFile file, final int depth, final Collection<T> results) throws IOException {
157
		// do nothing - overridable by subclass
158
	}
159

  
160
	/**
161
	 * Overridable callback method invoked at the end of processing.
162
	 * <p>
163
	 * This implementation does nothing.
164
	 * 
165
	 * @param results
166
	 *            the collection of result objects, may be updated
167
	 * @throws IOException
168
	 *             if an I/O Error occurs
169
	 */
170
	protected void handleEnd(final Collection<T> results) throws IOException {
171
		// do nothing - overridable by subclass
172
		clientProvider.disconnect();
173
	}
174

  
175
	public void initialize() {
176
		clientProvider.setItemParam(itemParam);
177
		clientProvider.connect();
178
		clientProvider.changeWorkingDirectory(itemParam.getBasePath());
179

  
180
	}
181

  
182
	/**
183
	 * Internal method that walks the directory hierarchy in a depth-first manner.
184
	 * <p>
185
	 * Users of this class do not need to call this method. This method will be called automatically by another (public) method on the
186
	 * specific subclass.
187
	 * <p>
188
	 * Writers of subclasses should call this method to start the directory walk. Once called, this method will emit events as it walks the
189
	 * hierarchy. The event methods have the prefix <code>handle</code>.
190
	 * 
191
	 * @param startDirectory
192
	 *            the directory to start from, not null
193
	 * @param results
194
	 *            the collection of result objects, may be updated
195
	 * @throws NullPointerException
196
	 *             if the start directory is null
197
	 * @throws IOException
198
	 *             if an I/O Error occurs
199
	 */
200
	protected final void walk(final String startDirectory, final Collection<T> results) throws IOException {
201
		if (startDirectory == null) { throw new NullPointerException("Start Directory is null"); }
202
		handleStart(startDirectory, results);
203

  
204
		walk(startDirectory, -1, results);
205
		handleEnd(results);
206

  
207
	}
208

  
209
	/**
210
	 * Main recursive method to examine the directory hierarchy.
211
	 * 
212
	 * @param directory
213
	 *            the directory to examine, not null
214
	 * @param depth
215
	 *            the directory level (starting directory = 0)
216
	 * @param results
217
	 *            the collection of result objects, may be updated
218
	 * @throws IOException
219
	 *             if an I/O Error occurs
220
	 */
221
	private void walk(final String directory, final int depth, final Collection<T> results) throws IOException {
222
		if (handleDirectory(directory, depth, results)) {
223
			String currentDirectory = clientProvider.printWorkingDirectory();
224
			clientProvider.changeWorkingDirectory(directory);
225
			int childDepth = depth + 1;
226
			if ((depthLimit < 0) || (childDepth <= depthLimit)) {
227
				// TODO: must be implemented the filter of file
228
				FTPFile[] childFiles = clientProvider.listFiles();
229
				if (childFiles != null) {
230
					for (FTPFile childFile : childFiles) {
231
						if (childFile.isDirectory()) {
232
							handleDirectoryStart(clientProvider.printWorkingDirectory() + "/" + childFile.getName(), childDepth, results);
233
							walk(clientProvider.printWorkingDirectory() + "/" + childFile.getName(), childDepth, results);
234
							clientProvider.changeWorkingDirectory(currentDirectory);
235
						} else {
236
							if ((filter == null) || filter.accept(childFile)) {
237
								log.info("Take the File " + childFile.getName());
238
								handleFile(childFile, childDepth, results);
239
							}
240

  
241
						}
242
					}
243
				}
244
			}
245
			handleDirectoryEnd(directory, depth, results);
246
		}
247

  
248
	}
249

  
250
	protected String getCurrentDirectory() throws IOException {
251
		return clientProvider.printWorkingDirectory();
252

  
253
	}
254

  
255
	/**
256
	 * Overridable callback method invoked at the end of processing each directory.
257
	 * <p>
258
	 * This implementation does nothing.
259
	 * 
260
	 * @param directory
261
	 *            the directory being processed
262
	 * @param depth
263
	 *            the current directory level (starting directory = 0)
264
	 * @param results
265
	 *            the collection of result objects, may be updated
266
	 * @throws IOException
267
	 *             if an I/O Error occurs
268
	 */
269
	protected void handleDirectoryEnd(final String directory, final int depth, final Collection<T> results) throws IOException {
270
		// do nothing - overridable by subclass
271
	}
272

  
273
	protected String getFileFrom(final String remote) throws IOException {
274
		return clientProvider.retrieveFileStream(remote);
275
	}
276

  
277
	protected void completePendingCommand() throws IOException {
278
		clientProvider.completePendingCommand();
279
	}
280

  
281
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/ftp/ItemUtility.java
1
package eu.dnetlib.data.collector.plugins.ftp;
2

  
3
import java.io.UnsupportedEncodingException;
4
import java.net.URI;
5
import java.net.URISyntaxException;
6
import java.net.URLEncoder;
7

  
8
import org.apache.commons.codec.binary.Base64;
9

  
10
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
11

  
12
/**
13
 * Utility class for the generation of the Linked resultset
14
 */
15
public class ItemUtility {
16

  
17
	/** The username. */
18
	private String username;
19

  
20
	/** The password. */
21
	private String password;
22

  
23
	/** The host. */
24
	private String host;
25

  
26
	/** The base path. */
27
	private String basePath;
28

  
29
	/** The controller url. */
30
	private String controllerURL;
31

  
32
	private String protocol;
33

  
34
	public ItemUtility() {}
35

  
36
	public ItemUtility(final InterfaceDescriptor descriptor) {
37

  
38
		URI source = null;
39
		try {
40
			source = new URI(descriptor.getBaseUrl());
41
		} catch (URISyntaxException e) {
42
			return;
43
		}
44
		this.setHost(source.getHost());
45
		this.setBasePath(source.getPath());
46
		this.protocol = source.getScheme();
47
		this.username = descriptor.getParams().get("username");
48
		this.password = descriptor.getParams().get("password");
49

  
50
	}
51

  
52
	/**
53
	 * Gets the username.
54
	 * 
55
	 * @return the username
56
	 */
57
	public String getUsername() {
58
		return username;
59
	}
60

  
61
	/**
62
	 * Sets the username.
63
	 * 
64
	 * @param username
65
	 *            the new username
66
	 */
67
	public ItemUtility setUsername(final String username) {
68
		this.username = username;
69
		return this;
70
	}
71

  
72
	/**
73
	 * Gets the password.
74
	 * 
75
	 * @return the password
76
	 */
77
	public String getPassword() {
78
		return password;
79
	}
80

  
81
	/**
82
	 * Sets the password.
83
	 * 
84
	 * @param password
85
	 *            the new password
86
	 */
87
	public ItemUtility setPassword(final String password) {
88
		this.password = password;
89
		return this;
90
	}
91

  
92
	/**
93
	 * Gets the host.
94
	 * 
95
	 * @return the host
96
	 */
97
	public String getHost() {
98
		return host;
99
	}
100

  
101
	/**
102
	 * Sets the host.
103
	 * 
104
	 * @param host
105
	 *            the new host
106
	 */
107
	public ItemUtility setHost(final String host) {
108
		this.host = host;
109
		return this;
110
	}
111

  
112
	/**
113
	 * Gets the base path.
114
	 * 
115
	 * @return the base path
116
	 */
117
	public String getBasePath() {
118
		return basePath;
119
	}
120

  
121
	/**
122
	 * Sets the base path.
123
	 * 
124
	 * @param basePath
125
	 *            the new base path
126
	 */
127
	public ItemUtility setBasePath(final String basePath) {
128
		this.basePath = basePath;
129
		return this;
130
	}
131

  
132
	/**
133
	 * Gets the controller url.
134
	 * 
135
	 * @return the controller url
136
	 */
137
	public String getControllerURL() {
138
		return controllerURL;
139
	}
140

  
141
	/**
142
	 * Sets the controller url.
143
	 * 
144
	 * @param controllerURL
145
	 *            the new controller url
146
	 */
147
	public ItemUtility setControllerURL(final String controllerURL) {
148
		this.controllerURL = controllerURL;
149
		return this;
150
	}
151

  
152
	public String getProtocol() {
153
		return protocol;
154
	}
155

  
156
	public ItemUtility setProtocol(final String protocol) {
157
		this.protocol = protocol;
158
		return this;
159
	}
160

  
161
	/**
162
	 * Generate url encodec.
163
	 * 
164
	 * @param absolutePath
165
	 *            the relative path
166
	 * @return the string
167
	 * @throws UnsupportedEncodingException
168
	 *             the unsupported encoding exception
169
	 */
170
	public String generateURLEncoded(final String absolutePath) throws UnsupportedEncodingException {
171
		URI uri = URI.create(absolutePath);
172

  
173
		StringBuilder sb = serialize(uri);
174

  
175
		byte[] miaEnc = Base64.encodeBase64(sb.toString().getBytes());
176
		return controllerURL + "?var=" + URLEncoder.encode(new String(miaEnc), "UTF-8");
177
	}
178

  
179
	@Override
180
	public String toString() {
181
		return serialize(URI.create(getProtocol() + "://" + getHost() + getBasePath())).toString();
182
	}
183

  
184
	private StringBuilder serialize(final URI uri) {
185
		StringBuilder sb = new StringBuilder();
186
		sb.append(uri.getScheme() + "://");
187
		if ((username != null) && (password != null)) {
188
			sb.append(username);
189
			sb.append(":");
190
			sb.append(password);
191
			sb.append("@");
192
		}
193
		sb.append(uri.getHost());
194
		sb.append(uri.getPath());
195

  
196
		return sb;
197
	}
198

  
199
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-modular-collector-service</artifactId>
12
	<packaging>jar</packaging>
13
	<version>3.1.12</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12</developerConnection>
16
	</scm>
17
	<dependencies>
18
		<dependency>
19
			<groupId>eu.dnetlib</groupId>
20
			<artifactId>dnet-modular-collector-service-rmi</artifactId>
21
			<version>[1.3.0,2.0.0)</version>
22
		</dependency>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>cnr-resultset-service</artifactId>
26
			<version>[2.0.0, 3.0.0)</version>
27
		</dependency>
28
		<dependency>
29
			<groupId>eu.dnetlib</groupId>
30
			<artifactId>cnr-blackboard-common</artifactId>
31
			<version>[2.0.0,3.0.0)</version>
32
		</dependency>
33
		<dependency>
34
			<groupId>javax.servlet</groupId>
35
			<artifactId>javax.servlet-api</artifactId>
36
			<version>${javax.servlet.version}</version>
37
			<scope>provided</scope>
38
		</dependency>
39
		<dependency>
40
			<groupId>net.sf.opencsv</groupId>
41
			<artifactId>opencsv</artifactId>
42
			<version>2.0</version>
43
		</dependency>
44
		<dependency>
45
			<groupId>junit</groupId>
46
			<artifactId>junit</artifactId>
47
			<version>4.8.2</version>
48
			<scope>test</scope>
49
		</dependency>
50
		<dependency>
51
			<groupId>commons-net</groupId>
52
			<artifactId>commons-net</artifactId>
53
			<version>3.3</version>
54
		</dependency>
55
		<dependency>
56
			<groupId>jcraft</groupId>
57
			<artifactId>jsch</artifactId>
58
			<version>0.1.44</version>
59
		</dependency>
60
		<dependency>
61
			<groupId>org.apache.commons</groupId>
62
			<artifactId>commons-compress</artifactId>
63
			<version>1.6</version>
64
		</dependency>
65
		<dependency>
66
			<groupId>org.mockito</groupId>
67
			<artifactId>mockito-core</artifactId>
68
			<version>1.6</version>
69
			<scope>test</scope>
70
		</dependency>
71
		<dependency>
72
			<groupId>commons-httpclient</groupId>
73
			<artifactId>commons-httpclient</artifactId>
74
			<version>3.1</version>
75
		</dependency>
76
		<dependency>
77
			<groupId>com.google.code.gson</groupId>
78
			<artifactId>gson</artifactId>
79
			<version>${google.gson.version}</version>
80
		</dependency>
81
		<dependency>
82
			<groupId>org.apache.commons</groupId>
83
			<artifactId>commons-csv</artifactId>
84
			<version>1.0</version>
85
		</dependency>
86
	</dependencies>
87
</project>
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/oai/engine/HttpConnector.java
1
package eu.dnetlib.data.collector.plugins.oai.engine;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.net.HttpURLConnection;
6
import java.net.URL;
7
import java.security.GeneralSecurityException;
8
import java.security.KeyManagementException;
9
import java.security.NoSuchAlgorithmException;
10
import java.security.cert.CertificateException;
11
import java.security.cert.X509Certificate;
12
import java.util.List;
13
import java.util.Map;
14

  
15
import javax.net.ssl.HttpsURLConnection;
16
import javax.net.ssl.SSLContext;
17
import javax.net.ssl.TrustManager;
18
import javax.net.ssl.X509TrustManager;
19

  
20
import org.apache.commons.io.IOUtils;
21
import org.apache.commons.lang.math.NumberUtils;
22
import org.apache.commons.logging.Log;
23
import org.apache.commons.logging.LogFactory;
24

  
25
import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
26
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
27

  
28
/**
29
 * @author jochen, michele, andrea
30
 *
31
 */
32
public class HttpConnector {
33

  
34
	private static final Log log = LogFactory.getLog(HttpConnector.class);
35

  
36
	private int maxNumberOfRetry = 6;
37
	private int defaultDelay = 120; // seconds
38
	private int readTimeOut = 120; // seconds
39

  
40
	/**
41
	 * @param requestUrl
42
	 * @return the content of the downloaded resource
43
	 * @throws CollectorServiceException
44
	 */
45
	public String getInputSource(final String requestUrl) throws CollectorServiceException {
46
		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
47
	}
48

  
49
	private String attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
50
			throws CollectorServiceException {
51

  
52
		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
53

  
54
		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
55
		try {
56
			InputStream input = null;
57

  
58
			try {
59
				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
60
				urlConn.setInstanceFollowRedirects(false);
61
				urlConn.setReadTimeout(readTimeOut * 1000);
62

  
63
				if (log.isDebugEnabled()) {
64
					logHeaderFields(urlConn);
65
				}
66

  
67
				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
68
				if (retryAfter > 0) {
69
					log.warn("waiting and repeating request after " + retryAfter + " sec.");
70
					Thread.sleep(retryAfter * 1000);
71
					errorList.add("503 Service Unavailable");
72
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
73
				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) {
74
					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
75
					log.info("The requested url has been moved to " + newUrl);
76
					errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
77
					return attemptDownload(newUrl, retryNumber + 1, errorList);
78
				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
79
					log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
80
					Thread.sleep(defaultDelay * 1000);
81
					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
82
					return attemptDownload(requestUrl, retryNumber + 1, errorList);
83
				} else {
84
					input = urlConn.getInputStream();
85
					return IOUtils.toString(input);
86
				}
87
			} catch (IOException e) {
88
				log.error("error while retrieving from http-connection occured: " + e, e);
89
				Thread.sleep(defaultDelay * 1000);
90
				errorList.add(e.getMessage());
91
				return attemptDownload(requestUrl, retryNumber + 1, errorList);
92
			} finally {
93
				IOUtils.closeQuietly(input);
94
			}
95
		} catch (InterruptedException e) {
96
			throw new CollectorServiceException(e);
97
		}
98
	}
99

  
100
	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
101
		log.debug("StatusCode: " + urlConn.getResponseMessage());
102

  
103
		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
104
			if (e.getKey() != null) {
105
				for (String v : e.getValue()) {
106
					log.debug("  key: " + e.getKey() + " - value: " + v);
107
				}
108
			}
109
		}
110
	}
111

  
112
	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
113
		for (String key : headerMap.keySet()) {
114
			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer
115
					.parseInt(headerMap.get(key).get(0)) + 10; }
116
		}
117
		return -1;
118
	}
119

  
120
	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
121
		for (String key : headerMap.keySet()) {
122
			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
123
		}
124
		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
125
	}
126

  
127
	/**
128
	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
129
	 *
130
	 * @throws NoSuchAlgorithmException
131
	 * @throws KeyManagementException
132
	 */
133
	public void initTrustManager() {
134
		final X509TrustManager tm = new X509TrustManager() {
135

  
136
			@Override
137
			public void checkClientTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
138

  
139
			@Override
140
			public void checkServerTrusted(final X509Certificate[] xcs, final String string) throws CertificateException {}
141

  
142
			@Override
143
			public X509Certificate[] getAcceptedIssuers() {
144
				return null;
145
			}
146
		};
147
		try {
148
			final SSLContext ctx = SSLContext.getInstance("TLS");
149
			ctx.init(null, new TrustManager[] { tm }, null);
150
			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
151
		} catch (GeneralSecurityException e) {
152
			log.fatal(e);
153
			throw new IllegalStateException(e);
154
		}
155
	}
156

  
157
	public int getMaxNumberOfRetry() {
158
		return maxNumberOfRetry;
159
	}
160

  
161
	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
162
		this.maxNumberOfRetry = maxNumberOfRetry;
163
	}
164

  
165
	public int getDefaultDelay() {
166
		return defaultDelay;
167
	}
168

  
169
	public void setDefaultDelay(final int defaultDelay) {
170
		this.defaultDelay = defaultDelay;
171
	}
172

  
173
	public int getReadTimeOut() {
174
		return readTimeOut;
175
	}
176

  
177
	public void setReadTimeOut(final int readTimeOut) {
178
		this.readTimeOut = readTimeOut;
179
	}
180

  
181
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/functions/ListOaiSetsFunction.java
1
package eu.dnetlib.data.collector.functions;
2

  
3
import java.io.StringReader;
4
import java.util.Iterator;
5
import java.util.List;
6
import java.util.Map;
7

  
8
import org.dom4j.Document;
9
import org.dom4j.DocumentException;
10
import org.dom4j.io.SAXReader;
11
import org.springframework.beans.factory.annotation.Required;
12

  
13
import com.google.common.base.Function;
14
import com.google.common.collect.Iterators;
15
import com.google.common.collect.Lists;
16

  
17
import eu.dnetlib.data.collector.plugins.oaisets.OaiSetsIteratorFactory;
18
import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
19

  
20
public class ListOaiSetsFunction implements ParamValuesFunction {
21

  
22
	private OaiSetsIteratorFactory oaiSetsIteratorFactory;
23

  
24
	@Override
25
	public List<ProtocolParameterValue> findValues(final String baseUrl, final Map<String, String> params) {
26
		final SAXReader reader = new SAXReader();
27

  
28
		final Iterator<ProtocolParameterValue> iter = Iterators.transform(oaiSetsIteratorFactory.newIterator(baseUrl),
29
				new Function<String, ProtocolParameterValue>() {
30

  
31
					@Override
32
					public ProtocolParameterValue apply(final String s) {
33
						try {
34
							final Document doc = reader.read(new StringReader(s));
35
							final String id = doc.valueOf("//*[local-name()='setSpec']");
36
							final String name = doc.valueOf("//*[local-name()='setName']");
37
							return new ProtocolParameterValue(id, name);
38
						} catch (DocumentException e) {
39
							throw new RuntimeException("Error in ListSets", e);
40
						}
41
					}
42
				});
43
		return Lists.newArrayList(iter);
44
	}
45

  
46
	public OaiSetsIteratorFactory getOaiSetsIteratorFactory() {
47
		return oaiSetsIteratorFactory;
48
	}
49

  
50
	@Required
51
	public void setOaiSetsIteratorFactory(final OaiSetsIteratorFactory oaiSetsIteratorFactory) {
52
		this.oaiSetsIteratorFactory = oaiSetsIteratorFactory;
53
	}
54
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/CollectorServiceImpl.java
1
package eu.dnetlib.data.collector;
2

  
3
import java.util.List;
4
import java.util.Map;
5
import java.util.Set;
6

  
7
import javax.annotation.Resource;
8
import javax.xml.ws.wsaddressing.W3CEndpointReference;
9

  
10
import com.google.common.collect.Lists;
11
import com.google.common.collect.Sets;
12

  
13
import eu.dnetlib.data.collector.plugin.CollectorPlugin;
14
import eu.dnetlib.data.collector.rmi.CollectorService;
15
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
16
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
17
import eu.dnetlib.data.collector.rmi.ProtocolDescriptor;
18
import eu.dnetlib.data.collector.rmi.ProtocolParameter;
19
import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
20
import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
21
import eu.dnetlib.enabling.tools.AbstractBaseService;
22

  
23
public class CollectorServiceImpl extends AbstractBaseService implements CollectorService {
24

  
25
	@Resource
26
	private CollectorPluginEnumerator collectorPluginEnumerator;
27

  
28
	@Resource
29
	private IterableResultSetFactory iterableResultSetFactory;
30

  
31
	@Override
32
	public W3CEndpointReference collect(final InterfaceDescriptor ifDescriptor) throws CollectorServiceException {
33
		return dateRangeCollect(ifDescriptor, null, null);
34
	}
35

  
36
	@Override
37
	public W3CEndpointReference dateRangeCollect(
38
			final InterfaceDescriptor ifDescriptor, final String from, final String until)
39
			throws CollectorServiceException {
40
		final CollectorPlugin plugin = collectorPluginEnumerator.get(ifDescriptor.getProtocol());
41

  
42
		if (!verifyParams(ifDescriptor.getParams().keySet(), Sets.newHashSet(plugin.listNameParameters()))) { throw new CollectorServiceException(
43
				"Invalid parameters, valid: " + plugin.listNameParameters() + ", current: " + ifDescriptor.getParams().keySet()); }
44

  
45
		final Iterable<String> iter = plugin.collect(ifDescriptor, from, until);
46

  
47
		return iterableResultSetFactory.createIterableResultSet(iter);
48
	}
49

  
50
	@Override
51
	public List<ProtocolDescriptor> listProtocols() {
52
		final List<ProtocolDescriptor> list = Lists.newArrayList();
53
		for (CollectorPlugin plugin : collectorPluginEnumerator.getAll()) {
54
			list.add(plugin.getProtocolDescriptor());
55
		}
56
		return list;
57
	}
58

  
59
	@Override
60
	public List<ProtocolParameterValue> listValidValuesForParam(final String protocol,
61
			final String baseUrl,
62
			final String param,
63
			final Map<String, String> otherParams) throws CollectorServiceException {
64
		final CollectorPlugin plugin = collectorPluginEnumerator.get(protocol);
65

  
66
		for (ProtocolParameter pp : plugin.getProtocolDescriptor().getParams()) {
67
			if (pp.getName().equals(param) && pp.isFunctionPopulated()) { return pp.getPopulateFunction().findValues(baseUrl, otherParams); }
68
		}
69

  
70
		return Lists.newArrayList();
71
	}
72

  
73
	private boolean verifyParams(final Set<String> curr, final Set<String> valid) {
74
		return valid.containsAll(curr);
75
	}
76

  
77
}
modules/dnet-modular-collector-service/tags/dnet-modular-collector-service-3.1.12/src/main/java/eu/dnetlib/data/collector/plugins/oai/engine/XmlCleaner.java
1
package eu.dnetlib.data.collector.plugins.oai.engine;
2

  
3
import java.util.HashMap;
4
import java.util.HashSet;
5
import java.util.Map;
6
import java.util.Set;
7
import java.util.regex.Pattern;
8

  
9
/**
10
 * @author jochen
11
 *
12
 */
13
public class XmlCleaner {
14
	/**
15
	 * Pattern for numeric entities.
16
	 */
17
	private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
18
	//	    private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
19
	private static Pattern invalidControlCharPattern = Pattern.compile("&#11;");
20
	/**
21
	 * Pattern that negates the allowable XML 4 byte unicode characters. Valid
22
	 * are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
23
	 * [#x10000-#x10FFFF]
24
	 */
25
	private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
26

  
27
	// Map entities to their unicode equivalent
28
	private static Set<String> goodEntities = new HashSet<String>();
29
	private static Map<String, String> badEntities = new HashMap<String, String>();
30
	
31
	static {
32
		// pre-defined XML entities
33
		goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark
34
		goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand
35
		goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign
36
		goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign
37
		// control entities
38
		//badEntities.put("&#11;", "");
39
		// misc entities
40
		badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
41
		badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
42
		badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
43
		// Latin 1 entities
44
		badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
45
		badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
46
		badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
47
		badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
48
		badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
49
		badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
50
		badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
51
		badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
52
		badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
53
		badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
54
		badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
55
		badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
56
		badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
57
		badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
58
		badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
59
		badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
60
		badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
61
		badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
62
		badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
63
		badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
64
		badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
65
		badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
66
		badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
67
		badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
68
		badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff