1
|
package eu.dnetlib.data.transform.xml2;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.util.*;
|
5
|
|
6
|
import org.apache.commons.lang3.StringUtils;
|
7
|
import org.dom4j.Attribute;
|
8
|
import org.dom4j.Document;
|
9
|
import org.dom4j.DocumentException;
|
10
|
import org.dom4j.Element;
|
11
|
import org.dom4j.io.SAXReader;
|
12
|
|
13
|
public class Dom4jUtilityParser {
|
14
|
|
15
|
public static final String NS_SEPARATOR = ":";
|
16
|
|
17
|
public static String xpath(final String ... p) {
|
18
|
return Arrays.stream(p)
|
19
|
.map(s -> String.format("/*[local-name()='%s']", s))
|
20
|
.reduce((s1, s2) -> s1 + s2)
|
21
|
.get();
|
22
|
}
|
23
|
|
24
|
public static Document parseXml(final String xml) throws DocumentException {
|
25
|
return new SAXReader().read(new StringReader(xml));
|
26
|
}
|
27
|
|
28
|
public static int countNodes(final Document document, final String xpath) throws VtdException {
|
29
|
if (StringUtils.isBlank(xpath)) {
|
30
|
return 0;
|
31
|
}
|
32
|
List res = document.selectNodes(xpath);
|
33
|
return res != null ? res.size() : 0;
|
34
|
}
|
35
|
|
36
|
public static Node getNode(final Document document, final String xpath) throws VtdException {
|
37
|
if (StringUtils.isBlank(xpath)) {
|
38
|
return new Node();
|
39
|
}
|
40
|
|
41
|
return asNode(document.selectSingleNode(xpath));
|
42
|
}
|
43
|
|
44
|
public static List<Node> getNodes(final Document document, final String xpath) throws VtdException {
|
45
|
final List<Node> results = new ArrayList<>();
|
46
|
if (StringUtils.isBlank(xpath)) {
|
47
|
return results;
|
48
|
}
|
49
|
|
50
|
for(final Object o : document.selectNodes(xpath)) {
|
51
|
results.add(asNode((org.dom4j.Node) o));
|
52
|
}
|
53
|
|
54
|
return results;
|
55
|
}
|
56
|
|
57
|
private static Node asNode(final org.dom4j.Node dNode) {
|
58
|
final Node currentNode = new Node();
|
59
|
if (dNode == null) return currentNode;
|
60
|
final String name = dNode.getName();
|
61
|
currentNode.setName(name.contains(NS_SEPARATOR) ? StringUtils.substringAfter(name, NS_SEPARATOR) : name);
|
62
|
currentNode.setTextValue(dNode.getText());
|
63
|
currentNode.setAttributes(getAttributes(dNode));
|
64
|
|
65
|
return currentNode;
|
66
|
}
|
67
|
|
68
|
private static Map<String, String> getAttributes(final org.dom4j.Node dNode) {
|
69
|
final Map<String, String> attributes = new HashMap<>();
|
70
|
|
71
|
if (dNode instanceof Element) {
|
72
|
Iterator<Attribute> it = ((Element) dNode).attributeIterator();
|
73
|
while(it.hasNext()) {
|
74
|
final Attribute a = it.next();
|
75
|
attributes.put(a.getName(), a.getValue());
|
76
|
}
|
77
|
}
|
78
|
return attributes;
|
79
|
}
|
80
|
|
81
|
public static String getFirstValue(final Document document, final String xpath) throws VtdException {
|
82
|
if (StringUtils.isBlank(xpath)) {
|
83
|
return null;
|
84
|
}
|
85
|
|
86
|
return document.valueOf(xpath);
|
87
|
}
|
88
|
|
89
|
}
|