Project

General

Profile

1
package eu.dnetlib.msro.openaireplus.workflows.nodes.objectStore;
2

    
3
import java.io.File;
4
import java.util.regex.Matcher;
5
import java.util.regex.Pattern;
6

    
7
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
8
import eu.dnetlib.data.objectstore.rmi.Protocols;
9
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
10

    
11
public class ArxivIteratorRepository extends IteratorRepository {
12

    
13
	private Pattern pattern;
14

    
15
	public ArxivIteratorRepository(final Iterable<File> input, final String repositoryPrefix, final String oaiPrefix) {
16
		super(input, repositoryPrefix, oaiPrefix);
17
		pattern = Pattern.compile("[0-9]");
18

    
19
	}
20

    
21
	@Override
22
	String generateNextElement() {
23
		try {
24
			String inputname = input.next().getCanonicalPath();
25
			String[] values = inputname.split("/");
26
			if (values.length > 1) {
27
				ObjectStoreFile info = new ObjectStoreFile();
28
				String fileName = values[values.length - 1].replace(".pdf", "");
29
				Matcher matcher = pattern.matcher(fileName);
30
				if (matcher.find()) {
31
					if (matcher.start() != 0) {
32
						fileName = fileName.substring(0, matcher.start()) + "/" + fileName.substring(matcher.start());
33
					}
34
				}
35
				info.setDownloadedURL("http://arxiv.org/abs/" + fileName);
36
				info.setAccessProtocol(Protocols.None);
37
				// String value = "oai:arXiv.org:" + fileName;
38
				String value = oaiPrefix + fileName;
39
				String resultID = repositoryPrefix + "::" + DnetXsltFunctions.md5(value);
40
				info.setObjectID(resultID + "::" + DnetXsltFunctions.md5(info.getDownloadedURL()));
41
				info.setMimeType("pdf");
42
				info.setURI(inputname);
43
				return info.toJSON();
44
			}
45
		} catch (Exception e) {
46
			return null;
47
		}
48
		return null;
49
	}
50

    
51
}
(1-1/6)