|
1 |
package eu.dnetlib.repo.manager.oaipmh;
|
|
2 |
|
|
3 |
import org.apache.http.HttpRequest;
|
|
4 |
import org.apache.http.HttpResponse;
|
|
5 |
import org.apache.http.client.HttpClient;
|
|
6 |
import org.apache.http.client.methods.HttpGet;
|
|
7 |
import org.apache.http.client.methods.HttpUriRequest;
|
|
8 |
import org.apache.http.client.methods.RequestBuilder;
|
|
9 |
import org.apache.http.impl.client.HttpClientBuilder;
|
|
10 |
import org.w3c.dom.Document;
|
|
11 |
import org.w3c.dom.Node;
|
|
12 |
import org.w3c.dom.NodeList;
|
|
13 |
import org.xml.sax.SAXException;
|
|
14 |
|
|
15 |
import javax.xml.XMLConstants;
|
|
16 |
import javax.xml.parsers.DocumentBuilder;
|
|
17 |
import javax.xml.parsers.DocumentBuilderFactory;
|
|
18 |
import javax.xml.parsers.ParserConfigurationException;
|
|
19 |
import javax.xml.transform.stream.StreamSource;
|
|
20 |
import javax.xml.validation.Schema;
|
|
21 |
import javax.xml.validation.SchemaFactory;
|
|
22 |
import javax.xml.validation.Validator;
|
|
23 |
import javax.xml.xpath.XPath;
|
|
24 |
import javax.xml.xpath.XPathConstants;
|
|
25 |
import javax.xml.xpath.XPathExpressionException;
|
|
26 |
import javax.xml.xpath.XPathFactory;
|
|
27 |
import java.io.IOException;
|
|
28 |
import java.io.InputStream;
|
|
29 |
import java.net.URL;
|
|
30 |
import java.util.HashMap;
|
|
31 |
import java.util.Map;
|
|
32 |
|
|
33 |
public class OAIPMHClient {
|
|
34 |
public void verifyURL(String url) throws OAIPMHException {
|
|
35 |
try {
|
|
36 |
HttpClient client = HttpClientBuilder.create().build();
|
|
37 |
HttpUriRequest request = RequestBuilder.get().setUri(url).
|
|
38 |
addParameter("verb", "Identify").build();
|
|
39 |
|
|
40 |
HttpResponse response = client.execute(request);
|
|
41 |
|
|
42 |
if (response.getStatusLine().getStatusCode() == 200) {
|
|
43 |
if (!validateResponse(response.getEntity().getContent())) {
|
|
44 |
|
|
45 |
try {
|
|
46 |
getSets(url);
|
|
47 |
} catch (OAIPMHException e) {
|
|
48 |
throw e;
|
|
49 |
}
|
|
50 |
//throw new OAIPMHException("Response is not valid according to OAI-PMH schema");
|
|
51 |
}
|
|
52 |
} else {
|
|
53 |
throw new OAIPMHException("Error getting response from server: '" + response.getStatusLine().getReasonPhrase());
|
|
54 |
}
|
|
55 |
} catch (IOException e) {
|
|
56 |
throw new OAIPMHException("Error verifying URL ", e);
|
|
57 |
}
|
|
58 |
|
|
59 |
}
|
|
60 |
|
|
61 |
public Map<String, String> getSets(String url) throws OAIPMHException {
|
|
62 |
Map<String, String> result = new HashMap<>();
|
|
63 |
|
|
64 |
try {
|
|
65 |
HttpClient client = HttpClientBuilder.create().build();
|
|
66 |
|
|
67 |
HttpResponse response = client.execute(new HttpGet(url + "?verb=ListSets"));
|
|
68 |
|
|
69 |
if (response.getStatusLine().getStatusCode() != 200) {
|
|
70 |
throw new OAIPMHException("Error getting response from server. Error: '" + response.getStatusLine().getReasonPhrase());
|
|
71 |
}
|
|
72 |
|
|
73 |
if (!validateResponse(response.getEntity().getContent())) {
|
|
74 |
throw new OAIPMHException("Response is not valid according to OAI-PMH schema");
|
|
75 |
}
|
|
76 |
|
|
77 |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
|
78 |
DocumentBuilder db = dbf.newDocumentBuilder();
|
|
79 |
XPathFactory xpf = XPathFactory.newInstance();
|
|
80 |
XPath xpath = xpf.newXPath();
|
|
81 |
|
|
82 |
String token = null;
|
|
83 |
do {
|
|
84 |
RequestBuilder builder = RequestBuilder.get().setUri(url).addParameter("verb", "ListSets");
|
|
85 |
|
|
86 |
if (token != null)
|
|
87 |
builder.addParameter("resumptionToken", token);
|
|
88 |
|
|
89 |
HttpUriRequest request = builder.build();
|
|
90 |
|
|
91 |
Document xml = db.parse(client.execute(request).getEntity().getContent());
|
|
92 |
NodeList sets = (NodeList) xpath.evaluate("//*[local-name()='setSpec']", xml, XPathConstants.NODESET);
|
|
93 |
|
|
94 |
for (int i = 0; i < sets.getLength(); i++) {
|
|
95 |
String setSpec = sets.item(i).getTextContent();
|
|
96 |
String setName = sets.item(i).getNextSibling().getTextContent();
|
|
97 |
|
|
98 |
System.out.println(setSpec);
|
|
99 |
result.put(setSpec, setName);
|
|
100 |
}
|
|
101 |
|
|
102 |
token = ((Node) xpath.evaluate("//*[local-name()='resumptionToken']", xml, XPathConstants.NODE)).getTextContent();
|
|
103 |
} while (token != null && !token.isEmpty());
|
|
104 |
} catch (IOException | ParserConfigurationException | SAXException | XPathExpressionException e) {
|
|
105 |
throw new OAIPMHException("Error getting sets", e);
|
|
106 |
}
|
|
107 |
|
|
108 |
return result;
|
|
109 |
}
|
|
110 |
|
|
111 |
private boolean validateResponse(InputStream response) {
|
|
112 |
try {
|
|
113 |
SchemaFactory factory =
|
|
114 |
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
|
|
115 |
Schema schema = factory.newSchema(new StreamSource(new URL("http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd").openStream()));
|
|
116 |
Validator validator = schema.newValidator();
|
|
117 |
validator.validate(new StreamSource(response));
|
|
118 |
return true;
|
|
119 |
} catch(Exception ex) {
|
|
120 |
ex.printStackTrace();
|
|
121 |
return false;
|
|
122 |
}
|
|
123 |
}
|
|
124 |
|
|
125 |
public static void main(String[] args) throws OAIPMHException {
|
|
126 |
// new OAIPMHClient().verifyURL("http://digital.csic.es/dspace-oai/request");
|
|
127 |
|
|
128 |
Map<String, String> sets = new OAIPMHClient().getSets("http://digital.csic.es/dspace-oai/request");
|
|
129 |
|
|
130 |
sets.entrySet().forEach(key -> {
|
|
131 |
System.out.println(sets.get(key));
|
|
132 |
});
|
|
133 |
}
|
|
134 |
}
|
first implementation of the new oai-pmh client