Project

General

Profile

« Previous | Next » 

Revision 50066

collector plugin for rest apis

View differences:

modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/rest/RestIteratorTest.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

  
6
import java.io.FileWriter;
7

  
8
import org.junit.Test;
9

  
10
/**
11
 * @author js
12
 *
13
 */
14
public class RestIteratorTest {
15

  
16
	private String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
17
	private String resumptionType = "count";
18
	private String resumptionParam = "from";
19
	private String resumptionXpath = "";
20
	private String resultTotalXpath = "//hits/total";
21
	private String resultFormatParam = "format";
22
	private String resultFormatValue = "json";
23
	private String resultSizeParam = "size";
24
	private int resultSizeValue = 100;
25
	private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
26

  
27

  
28
	@Test
29
	public void test(){
30
		final RestIterator iterator = new RestIterator(baseUrl, resumptionType, resumptionParam, resumptionXpath, resultTotalXpath, resultFormatParam, resultFormatValue, resultSizeParam, resultSizeValue, query);
31
		int i =20;
32
		while (iterator.hasNext() && i > 0) {
33
			String result = iterator.next();
34
//			try {
35
//				FileWriter writer = new FileWriter("/tmp/" + i + ".xml");
36
//				writer.write(result);
37
//				writer.close();				
38
//			}catch(Exception e) {
39
//				e.printStackTrace();
40
//			}
41
			
42
			i--;
43
		}
44
	}
45
}
46

  
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIteratorFactory.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

  
6
import java.util.Iterator;
7

  
8
/**
9
 * @author Jochen Schirrwagen
10
 *
11
 */
12
public class RestIteratorFactory {
13

  
14
	public Iterator<String> newIterator(
15
			final String baseUrl,
16
			final String resumptionType,
17
			final String resumptionParam,
18
			final String resumptionXpath,
19
			final String resultTotalXpath,
20
			final String resultFormatParam,
21
			final String resultFormatValue,
22
			final String resultSizeParam,
23
			final int resultSizeValue,
24
			final String query
25
			){
26
		return new RestIterator(baseUrl, resumptionType, resumptionParam, resumptionXpath, resultTotalXpath, resultFormatParam, resultFormatValue, resultSizeParam, resultSizeValue, query);
27
	}
28
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestCollectorPlugin.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

  
6
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
7
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
8
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
9

  
10
/**
11
 * @author js
12
 *
13
 */
14
public class RestCollectorPlugin extends AbstractCollectorPlugin {
15

  
16
	@Override
17
	public Iterable<String> collect(InterfaceDescriptor arg0, String arg1, String arg2)
18
			throws CollectorServiceException {
19
		// TODO Auto-generated method stub
20
		return null;
21
	}
22

  
23
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIterator.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collector.plugins.rest;
5

  
6
import java.io.InputStream;
7
import java.net.URL;
8
import java.util.Iterator;
9
import java.util.LinkedList;
10
import java.util.Queue;
11

  
12
import javax.xml.transform.OutputKeys;
13
import javax.xml.transform.Transformer;
14
import javax.xml.transform.TransformerConfigurationException;
15
import javax.xml.transform.TransformerFactory;
16
import javax.xml.xpath.XPath;
17
import javax.xml.xpath.XPathConstants;
18
import javax.xml.xpath.XPathExpression;
19
import javax.xml.xpath.XPathExpressionException;
20
import javax.xml.xpath.XPathFactory;
21

  
22
import org.apache.commons.io.IOUtils;
23
import org.w3c.dom.Node;
24
import org.xml.sax.InputSource;
25

  
26
/**
27
 * @author Jochen Schirrwagen, Aenne Loehden
28
 *
29
 */
30
public class RestIterator implements Iterator<String> {
31

  
32
	private static final String wrapName = "recordWrap";
33
	private String baseUrl;
34
	private String resumptionType;
35
	private String resumptionParam;
36
	private String resultFormatValue;
37
	private String queryParams;
38
	private int resultSizeValue = 100;
39
	private Queue<String> queue;
40
	private int resumptionInt = 0;			// integer resumption token (first record to harvest)
41
	private int resultTotal = -1;
42
	private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
43
	private InputStream resultStream;
44
	private Transformer transformer;
45
	private XPath xpath;
46
	private XPathExpression xprResultTotalPath;
47
	private XPathExpression xprResumptionPath;
48
	private String queryFormat;
49
	private String querySize;
50
	
51
	/*
52
	 * 
53
	 */
54
	public RestIterator(
55
			final String baseUrl,
56
			final String resumptionType,
57
			final String resumptionParam,
58
			final String resumptionXpath,
59
			final String resultTotalXpath,
60
			final String resultFormatParam,
61
			final String resultFormatValue,
62
			final String resultSizeParam,
63
			final int resultSizeValue,
64
			final String queryParams
65
			) {
66
		this.baseUrl = baseUrl;
67
		this.resumptionType = resumptionType;
68
		this.resumptionParam = resumptionParam;
69
		this.resultFormatValue = resultFormatValue;
70
		this.resultSizeValue = resultSizeValue;
71
		this.queryParams = queryParams;
72
		
73
        queryFormat = (resultFormatParam!="")? "&" + resultFormatParam + "=" + resultFormatValue : "";
74
        querySize = (resultSizeParam!="")? "&" + resultSizeParam + "=" + resultSizeValue : "";
75

  
76
		try {
77
			initXmlTransformation(resultTotalXpath, resumptionXpath);
78
		}catch(Exception exp) {
79
			throw new IllegalStateException("xml transformation init failed: " + exp.getMessage());
80
		}
81
        initQueue();
82
        updateQueue();
83
	}
84
	
85
	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath) throws TransformerConfigurationException, XPathExpressionException{
86
		String resumpXpath = (resumptionXpath=="") ? "/" : resumptionXpath;
87

  
88
		transformer = TransformerFactory.newInstance().newTransformer();
89
        transformer.setOutputProperty(OutputKeys.INDENT,"yes"); 
90
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount","3");
91
		xpath = XPathFactory.newInstance().newXPath();
92
		xprResultTotalPath = xpath.compile(resultTotalXpath);
93
		xprResumptionPath = xpath.compile(resumpXpath);
94
	}
95
	
96
	private void initQueue() {
97
		queue = new LinkedList<String>();
98
	}
99
	
100
	private void disconnect() {
101
		// TODO close inputstream
102
	}
103
	
104
	private void updateQueue() {
105
        String query = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
106
        System.out.println("query: " + query);
107
        queue.add(query);
108
	}
109
	
110
	/* (non-Javadoc)
111
	 * @see java.util.Iterator#hasNext()
112
	 */
113
	@Override
114
	public boolean hasNext() {
115
		if (queue.isEmpty()) {
116
			disconnect();
117
			return false;
118
		} else {
119
			return true;
120
		}
121
	}
122

  
123
	/* (non-Javadoc)
124
	 * @see java.util.Iterator#next()
125
	 */
126
	@Override
127
	public String next() {
128
		// TODO Auto-generated method stub
129
		String nextQuery = queue.remove();
130
		String resultJson;
131
		String resultXml = "";
132
		try {
133
            resultStream = new URL(nextQuery).openStream();
134
			if(resultFormatValue == "json"){				
135
				resultJson = IOUtils.toString(resultStream,"UTF-8");
136
				// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
137
				while(resultJson.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")){
138
					resultJson = resultJson.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
139
				}
140
				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
141
				resultXml = org.json.XML.toString(jsonObject,wrapName); // wrap xml in single root element
142
//				System.out.println(resultXml);
143
				resultStream = IOUtils.toInputStream(resultXml,"UTF-8");
144
			}
145
			
146
			InputSource inSource = new InputSource(resultStream);
147

  
148
			Node resultNode = (Node) xpath.evaluate("/", inSource, XPathConstants.NODE);
149
			resumptionInt += resultSizeValue;
150
			if(resumptionType=="scan"){ resumptionStr = xprResumptionPath.evaluate(resultNode);}
151
			if(resumptionType=="count"){ resumptionStr = Integer.toString(resumptionInt); }
152

  
153
			if (resultTotal == -1) {
154
				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
155
				System.out.println("resultTotal: " + resultTotal);
156
			}
157
			System.out.println("resultTotal: " + resultTotal);
158
			System.out.println("resInt: " + resumptionInt);
159
			if (resumptionInt < resultTotal) {
160
				updateQueue();
161
			}
162
			return resultXml;
163

  
164
		}catch(Exception exc) {
165
			exc.printStackTrace(System.err);
166
			throw new IllegalStateException("collection failed: " + exc.getMessage());
167
		}
168
	}
169

  
170
}

Also available in: Unified diff