Project

General

Profile

1
package eu.dnetlib.msro.workflows.nodes.objectstore;
2

    
3
import java.io.File;
4
import java.util.regex.Matcher;
5
import java.util.regex.Pattern;
6

    
7
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
8
import eu.dnetlib.rmi.data.ObjectStoreFile;
9
import eu.dnetlib.rmi.data.Protocols;
10

    
11
public class ArxivIteratorRepository extends IteratorRepository {
12

    
13
	private final Pattern pattern;
14

    
15
	public ArxivIteratorRepository(final Iterable<File> input, final String repositoryPrefix, final String oaiPrefix) {
16
		super(input, repositoryPrefix, oaiPrefix);
17
		this.pattern = Pattern.compile("[0-9]");
18

    
19
	}
20

    
21
	@Override
22
	String generateNextElement() {
23
		try {
24
			final String inputname = this.input.next().getCanonicalPath();
25
			final String[] values = inputname.split("/");
26
			if (values.length > 1) {
27
				final ObjectStoreFile info = new ObjectStoreFile();
28
				String fileName = values[values.length - 1].replace(".pdf", "");
29
				final Matcher matcher = this.pattern.matcher(fileName);
30
				if (matcher.find()) {
31
					if (matcher.start() != 0) {
32
						fileName = fileName.substring(0, matcher.start()) + "/" + fileName.substring(matcher.start());
33
					}
34
				}
35
				info.setDownloadedURL("http://arxiv.org/abs/" + fileName);
36
				info.setAccessProtocol(Protocols.None);
37
				// String value = "oai:arXiv.org:" + fileName;
38
				final String value = this.oaiPrefix + fileName;
39
				final String resultID = this.repositoryPrefix + "::" + DnetXsltFunctions.md5(value);
40
				info.setObjectID(resultID + "::" + DnetXsltFunctions.md5(info.getDownloadedURL()));
41
				info.setMimeType("pdf");
42
				info.setURI(inputname);
43
				return info.toJSON();
44
			}
45
		} catch (final Exception e) {
46
			return null;
47
		}
48
		return null;
49
	}
50

    
51
}
(1-1/6)