Project

General

Profile

1
package eu.dnetlib.miscutils.iterators.xml;
2

    
3
import static org.junit.Assert.assertNotNull;
4
import static org.junit.Assert.assertTrue;
5

    
6
import java.io.*;
7
import java.net.URI;
8
import java.nio.charset.StandardCharsets;
9
import java.nio.file.*;
10
import java.util.Iterator;
11
import java.util.zip.GZIPInputStream;
12
import java.util.zip.ZipInputStream;
13

    
14
import eu.dnetlib.rmi.data.CollectorServiceException;
15
import org.apache.commons.io.IOUtils;
16
import org.apache.http.HttpStatus;
17
import org.apache.http.client.methods.CloseableHttpResponse;
18
import org.apache.http.client.methods.HttpGet;
19
import org.apache.http.impl.client.CloseableHttpClient;
20
import org.apache.http.impl.client.HttpClients;
21
import org.dom4j.Document;
22
import org.dom4j.DocumentException;
23
import org.dom4j.io.SAXReader;
24
import org.junit.Before;
25
import org.junit.Ignore;
26
import org.junit.Test;
27
import org.springframework.core.io.ClassPathResource;
28
import org.springframework.core.io.Resource;
29
import sun.nio.ch.IOUtil;
30

    
31
public class IterableXmlParserTest {
32

    
33
	private Resource xmlZip = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.zip");
34

    
35
	private Resource xmlGz = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.xml.gz");
36

    
37
	private Resource xmlZipErr = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoarErr.zip");
38

    
39
	private Resource xmlSingle = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/singleRepo.xml");
40

    
41
	private Resource xmlBig = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/big.xml");
42

    
43
	private String element = "repository";
44

    
45
	private IterableXmlParser parser;
46

    
47
	private SAXReader reader;
48

    
49
	@Before
50
	public void setUp() throws Exception {
51
		reader = new SAXReader();
52
	}
53

    
54
	@Test
55
	public void testGz() throws Exception {
56
		doTest(new GZIPInputStream(xmlGz.getInputStream()), element);
57
	}
58

    
59
	@Test
60
	public void test() throws Exception {
61
		doTest(read(new ZipInputStream(xmlZip.getInputStream())), element);
62
	}
63

    
64
	@Test
65
	public void testErr() throws Exception {
66
		doTest(read(new ZipInputStream(xmlZipErr.getInputStream())), element);
67
	}
68

    
69
	@Test
70
	public void testSingle() throws Exception {
71
		doTest(xmlSingle.getInputStream(), element);
72
	}
73

    
74
	@Test
75
	public void testOaiRecord() throws Exception {
76
		int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/oaiRecord.xml").getInputStream(), "record");
77
		assertTrue(count == 1);
78
	}
79

    
80
	@Test
81
	public void testWeird() throws Exception {
82
		int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml").getInputStream(), "record");
83
		assertTrue(count == 3);
84
	}
85

    
86
	@Test
87
	public void testWeirdGz() throws Exception {
88
		int count = doTest(new GZIPInputStream(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml.gz").getInputStream()), "record");
89
		assertTrue(count == 3);
90
	}
91

    
92
	@Test
93
	public void testBig() throws Exception {
94
		int count = doTest(xmlBig.getInputStream(), "Entry");
95
		System.out.println(count);
96
	}
97

    
98
	@Test
99
	public void testBigRemote() throws Exception {
100
		Iterator<String> s = new XMLIterator("Entry", xmlBig.getInputStream());
101
		int i =0;
102
		Path dir = Paths.get("/tmp/foresight");
103
		Files.createDirectories(dir);
104
		while(s.hasNext()){
105
			System.out.println(++i);
106

    
107
			final Path path = Files.createTempFile(dir,"foresight", i+".xml");
108
			System.out.println("Temp file : " + path);
109

    
110
			//Writing data here
111
			byte[] strToBytes = s.next().getBytes();
112
			Files.write(path, strToBytes);
113

    
114
		}
115
	}
116

    
117
	private int doTest(final InputStream stream, final String element) throws DocumentException {
118
		parser = new IterableXmlParser(element, stream);
119
		int count = 0;
120
		for (String xml : parser) {
121
			System.out.println(xml);
122
			Document doc = reader.read(new StringReader(xml));
123
			assertNotNull(doc);
124
			assertNotNull(doc.selectSingleNode("//" + element));
125
			count++;
126
		}
127
		return count;
128
	}
129

    
130
	// helper method, reads the compressed text out of the xmlZip file
131
	private InputStream read(final ZipInputStream zis) throws IOException {
132

    
133
		final StringWriter sw = new StringWriter();
134
		while (zis.getNextEntry() != null) {
135

    
136
			byte[] buffer = new byte[1];
137

    
138
			while (zis.read(buffer) != -1) {
139
				IOUtils.write(buffer, sw, "UTF-8");
140
			}
141
		}
142
		zis.close();
143
		sw.close();
144

    
145
		return new ByteArrayInputStream(sw.toString().getBytes());
146
	}
147

    
148
}
    (1-1/1)