Project

General

Profile

1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

    
6
import java.io.InputStream;
7
import java.io.StringWriter;
8
import java.net.URL;
9
import java.util.Iterator;
10
import java.util.Queue;
11
import java.util.concurrent.PriorityBlockingQueue;
12

    
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerConfigurationException;
16
import javax.xml.transform.TransformerFactory;
17
import javax.xml.transform.dom.DOMSource;
18
import javax.xml.transform.stream.StreamResult;
19
import javax.xml.xpath.XPath;
20
import javax.xml.xpath.XPathConstants;
21
import javax.xml.xpath.XPathExpression;
22
import javax.xml.xpath.XPathExpressionException;
23
import javax.xml.xpath.XPathFactory;
24

    
25
import org.apache.commons.io.IOUtils;
26
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.LogFactory;
28
import org.w3c.dom.Node;
29
import org.w3c.dom.NodeList;
30
import org.xml.sax.InputSource;
31

    
32
import eu.dnetlib.data.collector.plugins.oai.OaiIterator;
33
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
34

    
35
/**
36
 * @author Jochen Schirrwagen, Aenne Loehden
37
 *
38
 */
39
public class RestIterator implements Iterator<String> {
40

    
41
	private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
42

    
43
	private static final String wrapName = "recordWrap";
44
	private String baseUrl;
45
	private String resumptionType;
46
	private String resumptionParam;
47
	private String resultFormatValue;
48
	private String queryParams;
49
	private int resultSizeValue = 100;
50
	private int resumptionInt = 0;			// integer resumption token (first record to harvest)
51
	private int resultTotal = -1;
52
	private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
53
	private InputStream resultStream;
54
	private Transformer transformer;
55
	private XPath xpath;
56
	private String query;
57
	private XPathExpression xprResultTotalPath;
58
	private XPathExpression xprResumptionPath;
59
	private XPathExpression xprEntity;
60
	private String queryFormat;
61
	private String querySize;
62
	private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
63
	
64
	/*
65
	 * 
66
	 */
67
	public RestIterator(
68
			final String baseUrl,
69
			final String resumptionType,
70
			final String resumptionParam,
71
			final String resumptionXpath,
72
			final String resultTotalXpath,
73
			final String resultFormatParam,
74
			final String resultFormatValue,
75
			final String resultSizeParam,
76
			final String queryParams,
77
			final String entityXpath
78
			) {
79
		this.baseUrl = baseUrl;
80
		this.resumptionType = resumptionType;
81
		this.resumptionParam = resumptionParam;
82
		this.resultFormatValue = resultFormatValue;
83
		this.queryParams = queryParams;
84
		
85
        queryFormat = (resultFormatParam!="")? "&" + resultFormatParam + "=" + resultFormatValue : "";
86
        querySize = (resultSizeParam!="")? "&" + resultSizeParam + "=" + resultSizeValue : "";
87

    
88
		try {
89
			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
90
		}catch(Exception exp) {
91
			throw new IllegalStateException("xml transformation init failed: " + exp.getMessage());
92
		}
93
        initQueue();
94
	}
95
	
96
	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException{
97
		String resumpXpath = (resumptionXpath=="") ? "/" : resumptionXpath;
98

    
99
		transformer = TransformerFactory.newInstance().newTransformer();
100
        transformer.setOutputProperty(OutputKeys.INDENT,"yes"); 
101
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3");
102
		xpath = XPathFactory.newInstance().newXPath();
103
		xprResultTotalPath = xpath.compile(resultTotalXpath);
104
		xprResumptionPath = xpath.compile(resumpXpath);
105
		xprEntity = xpath.compile(entityXpath);
106
	}
107
	
108
	private void initQueue() {
109
		query = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
110
	}
111
	
112
	private void disconnect() {
113
		// TODO close inputstream
114
	}
115
	
116
	/* (non-Javadoc)
117
	 * @see java.util.Iterator#hasNext()
118
	 */
119
	@Override
120
	public boolean hasNext() {
121
		if (recordQueue.isEmpty() && query.isEmpty()) {
122
			disconnect();
123
			return false;
124
		} else {
125
			return true;
126
		}
127
	}
128

    
129
	/* (non-Javadoc)
130
	 * @see java.util.Iterator#next()
131
	 */
132
	@Override
133
	public String next() {
134
		// TODO Auto-generated method stub
135
		
136
		synchronized (recordQueue) {
137
			while (recordQueue.isEmpty() && !query.isEmpty() ) {
138
				try {
139
					query = downloadPage(query);
140
				}catch(CollectorServiceException e) {
141
					throw new RuntimeException(e);
142
				}
143
			}
144
			return recordQueue.poll();
145
		}
146
	}
147
	
148
	
149
	/*
150
	 * download page and return nextQuery
151
	 */
152
	private String downloadPage(String query) throws CollectorServiceException{
153
		String resultJson;
154
		String resultXml = "";
155
		String nextQuery = "";
156
		try {
157
            resultStream = new URL(query).openStream();
158
			if(resultFormatValue == "json"){				
159
				resultJson = IOUtils.toString(resultStream,"UTF-8");
160
				// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
161
				while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){
162
					resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
163
				}
164
				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
165
				resultXml = org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
166
//				System.out.println(resultXml);
167
				resultStream = IOUtils.toInputStream(resultXml,"UTF-8");
168
			}
169
			
170
			InputSource inSource = new InputSource(resultStream);
171

    
172
			Node resultNode = (Node) xpath.evaluate("/", inSource, XPathConstants.NODE);
173

    
174
			NodeList nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
175
			
176
			for (int i = 0; i < nodeList.getLength(); i++) {
177
				Node n = nodeList.item(i);
178
				StringWriter sw = new StringWriter();
179
				transformer.transform(new DOMSource(n), new StreamResult(sw));
180
				recordQueue.add(sw.toString());
181
			}
182
				
183
			resumptionInt += resultSizeValue;
184
			if(resumptionType=="scan"){ resumptionStr = xprResumptionPath.evaluate(resultNode);}
185
			if(resumptionType=="count"){ resumptionStr = Integer.toString(resumptionInt); }
186

    
187
			if (resultTotal == -1) {
188
				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
189
				System.out.println("resultTotal: " + resultTotal);
190
			}
191
			System.out.println("resultTotal: " + resultTotal);
192
			System.out.println("resInt: " + resumptionInt);
193
			if (resumptionInt < resultTotal) {
194
				nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
195
			}else
196
				nextQuery = "";
197
			return nextQuery;
198

    
199
		}catch(Exception exc) {
200
			exc.printStackTrace(System.err);
201
			throw new IllegalStateException("collection failed: " + exc.getMessage());
202
		}
203

    
204
	}
205

    
206
}
(2-2/2)