Project

General

Profile

1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

    
6
import java.io.InputStream;
7
import java.net.URL;
8
import java.util.Iterator;
9
import java.util.LinkedList;
10
import java.util.Queue;
11

    
12
import javax.xml.transform.OutputKeys;
13
import javax.xml.transform.Transformer;
14
import javax.xml.transform.TransformerConfigurationException;
15
import javax.xml.transform.TransformerFactory;
16
import javax.xml.xpath.XPath;
17
import javax.xml.xpath.XPathConstants;
18
import javax.xml.xpath.XPathExpression;
19
import javax.xml.xpath.XPathExpressionException;
20
import javax.xml.xpath.XPathFactory;
21

    
22
import org.apache.commons.io.IOUtils;
23
import org.w3c.dom.Node;
24
import org.xml.sax.InputSource;
25

    
26
/**
27
 * @author Jochen Schirrwagen, Aenne Loehden
28
 *
29
 */
30
public class RestIterator implements Iterator<String> {
31

    
32
	private static final String wrapName = "recordWrap";
33
	private String baseUrl;
34
	private String resumptionType;
35
	private String resumptionParam;
36
	private String resultFormatValue;
37
	private String queryParams;
38
	private int resultSizeValue = 100;
39
	private Queue<String> queue;
40
	private int resumptionInt = 0;			// integer resumption token (first record to harvest)
41
	private int resultTotal = -1;
42
	private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
43
	private InputStream resultStream;
44
	private Transformer transformer;
45
	private XPath xpath;
46
	private XPathExpression xprResultTotalPath;
47
	private XPathExpression xprResumptionPath;
48
	private String queryFormat;
49
	private String querySize;
50
	
51
	/*
52
	 * 
53
	 */
54
	public RestIterator(
55
			final String baseUrl,
56
			final String resumptionType,
57
			final String resumptionParam,
58
			final String resumptionXpath,
59
			final String resultTotalXpath,
60
			final String resultFormatParam,
61
			final String resultFormatValue,
62
			final String resultSizeParam,
63
			final String queryParams
64
			) {
65
		this.baseUrl = baseUrl;
66
		this.resumptionType = resumptionType;
67
		this.resumptionParam = resumptionParam;
68
		this.resultFormatValue = resultFormatValue;
69
		this.queryParams = queryParams;
70
		
71
        queryFormat = (resultFormatParam!="")? "&" + resultFormatParam + "=" + resultFormatValue : "";
72
        querySize = (resultSizeParam!="")? "&" + resultSizeParam + "=" + resultSizeValue : "";
73

    
74
		try {
75
			initXmlTransformation(resultTotalXpath, resumptionXpath);
76
		}catch(Exception exp) {
77
			throw new IllegalStateException("xml transformation init failed: " + exp.getMessage());
78
		}
79
        initQueue();
80
        updateQueue();
81
	}
82
	
83
	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath) throws TransformerConfigurationException, XPathExpressionException{
84
		String resumpXpath = (resumptionXpath=="") ? "/" : resumptionXpath;
85

    
86
		transformer = TransformerFactory.newInstance().newTransformer();
87
        transformer.setOutputProperty(OutputKeys.INDENT,"yes"); 
88
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3");
89
		xpath = XPathFactory.newInstance().newXPath();
90
		xprResultTotalPath = xpath.compile(resultTotalXpath);
91
		xprResumptionPath = xpath.compile(resumpXpath);
92
	}
93
	
94
	private void initQueue() {
95
		queue = new LinkedList<String>();
96
	}
97
	
98
	private void disconnect() {
99
		// TODO close inputstream
100
	}
101
	
102
	private void updateQueue() {
103
        String query = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
104
        System.out.println("query: " + query);
105
        queue.add(query);
106
	}
107
	
108
	/* (non-Javadoc)
109
	 * @see java.util.Iterator#hasNext()
110
	 */
111
	@Override
112
	public boolean hasNext() {
113
		if (queue.isEmpty()) {
114
			disconnect();
115
			return false;
116
		} else {
117
			return true;
118
		}
119
	}
120

    
121
	/* (non-Javadoc)
122
	 * @see java.util.Iterator#next()
123
	 */
124
	@Override
125
	public String next() {
126
		// TODO Auto-generated method stub
127
		String nextQuery = queue.remove();
128
		String resultJson;
129
		String resultXml = "";
130
		try {
131
            resultStream = new URL(nextQuery).openStream();
132
			if(resultFormatValue == "json"){				
133
				resultJson = IOUtils.toString(resultStream,"UTF-8");
134
				// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
135
				while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){
136
					resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
137
				}
138
				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
139
				resultXml = org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
140
//				System.out.println(resultXml);
141
				resultStream = IOUtils.toInputStream(resultXml,"UTF-8");
142
			}
143
			
144
			InputSource inSource = new InputSource(resultStream);
145

    
146
			Node resultNode = (Node) xpath.evaluate("/", inSource, XPathConstants.NODE);
147
			resumptionInt += resultSizeValue;
148
			if(resumptionType=="scan"){ resumptionStr = xprResumptionPath.evaluate(resultNode);}
149
			if(resumptionType=="count"){ resumptionStr = Integer.toString(resumptionInt); }
150

    
151
			if (resultTotal == -1) {
152
				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
153
				System.out.println("resultTotal: " + resultTotal);
154
			}
155
			System.out.println("resultTotal: " + resultTotal);
156
			System.out.println("resInt: " + resumptionInt);
157
			if (resumptionInt < resultTotal) {
158
				updateQueue();
159
			}
160
			return resultXml;
161

    
162
		}catch(Exception exc) {
163
			exc.printStackTrace(System.err);
164
			throw new IllegalStateException("collection failed: " + exc.getMessage());
165
		}
166
	}
167

    
168
}
(2-2/2)