Project

General

Profile

1
package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.InputStream;
5
import java.io.StringWriter;
6
import java.nio.charset.StandardCharsets;
7
import java.util.ArrayList;
8
import java.util.List;
9
import java.util.Map;
10
import java.util.Queue;
11
import javax.xml.XMLConstants;
12
import javax.xml.parsers.DocumentBuilderFactory;
13
import javax.xml.transform.Transformer;
14
import javax.xml.transform.TransformerFactory;
15
import javax.xml.transform.dom.DOMSource;
16
import javax.xml.transform.stream.StreamResult;
17
import javax.xml.xpath.XPath;
18
import javax.xml.xpath.XPathConstants;
19
import javax.xml.xpath.XPathFactory;
20

    
21
import com.google.common.collect.Maps;
22
import org.apache.commons.lang.StringUtils;
23
import org.apache.commons.logging.Log;
24
import org.apache.commons.logging.LogFactory;
25
import org.w3c.dom.*;
26

    
27
/**
28
 * The Class SplitterDatasetsIterator.
29
 */
30
public class SplitterDatasetsIterator {
31

    
32
	/**
33
	 * The Constant log.
34
	 */
35
	private static final Log log = LogFactory.getLog(SplitterDatasetsIterator.class);
36

    
37
	/**
38
	 * The end queue.
39
	 */
40
	public static String END_QUEUE = "END_QUEUE";
41

    
42
	/**
43
	 * The publications.
44
	 */
45
	private Queue<String> publications;
46

    
47
	/**
48
	 * The input epr.
49
	 */
50
	private Iterable<String> inputEPR;
51

    
52
	/**
53
	 * The root name.
54
	 */
55
	private String rootName;
56

    
57
	/**
58
	 * Instantiates a new splitter datasets iterator.
59
	 *
60
	 * @param publicationsQueue the publications queue
61
	 * @param inputEPR          the input epr
62
	 * @param rootName          the root name
63
	 */
64
	public SplitterDatasetsIterator(final Queue<String> publicationsQueue, final Iterable<String> inputEPR, final String rootName) {
65
		this.publications = publicationsQueue;
66
		this.inputEPR = inputEPR;
67
		this.rootName = rootName;
68
	}
69

    
70
	/**
71
	 * Populate queues.
72
	 */
73
	public void populateQueues() {
74

    
75
		if (this.inputEPR == null) return;
76
		for (String inputXML : inputEPR) {
77

    
78
			final ByteArrayInputStream bais = new ByteArrayInputStream(inputXML.getBytes(StandardCharsets.UTF_8));
79
			final List<String> publication_extracted = extractByTag(bais, "publication");
80

    
81
			if (publication_extracted != null) {
82
				publications.addAll(publication_extracted);
83

    
84
			}
85
		}
86
		publications.add(END_QUEUE);
87
	}
88

    
89
	/**
90
	 * Extract by tag.
91
	 *
92
	 * @param inputXML the input xml
93
	 * @param tag      the tag
94
	 * @return the list
95
	 */
96
	private List<String> extractByTag(final InputStream inputXML, final String tag) {
97
		try {
98

    
99
			DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
100
			Document doc = dbf.newDocumentBuilder().parse(inputXML);
101

    
102
			XPath xpath = XPathFactory.newInstance().newXPath();
103

    
104
			Node rootNode = (Node) xpath.evaluate("//*[local-name()='" + this.rootName + "']", doc, XPathConstants.NODE);
105

    
106
			NamedNodeMap attributes = rootNode.getAttributes();
107

    
108
			Map<String, String> nameSpaces = Maps.newHashMap();
109

    
110
			for (int i = 0; i < attributes.getLength(); i++) {
111
				Node node = attributes.item(i);
112
				String name = node.getNodeName();
113
				if (name.startsWith("xmlns:")) {
114
					nameSpaces.put(StringUtils.substringAfter(name, "xmlns:"), node.getNodeValue());
115
				}
116

    
117
			}
118
			xpath = XPathFactory.newInstance().newXPath();
119
			NodeList nodes = (NodeList) xpath.evaluate("//*[local-name()='" + tag + "']/*[local-name()='record']", doc, XPathConstants.NODESET);
120

    
121
			if ((nodes != null) && (nodes.getLength() > 0)) {
122
				List<String> result = new ArrayList<>();
123
				for (int i = 0; i < nodes.getLength(); i++) {
124
					Document currentDoc = dbf.newDocumentBuilder().newDocument();
125
					Node imported = currentDoc.importNode(nodes.item(i), true);
126
					for (String key : nameSpaces.keySet()) {
127
						Element element = (Element) imported;
128
						element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, "xmlns:" + key, nameSpaces.get(key));
129
					}
130
					Transformer transformer = TransformerFactory.newInstance().newTransformer();
131
					DOMSource mydoc = new DOMSource(imported);
132
					StringWriter writer = new StringWriter();
133
					transformer.transform(mydoc, new StreamResult(writer));
134
					String record = writer.toString();
135
					result.add(record);
136
				}
137
				return result;
138
			}
139
		} catch (Exception e) {
140
			log.error("Error on extracting " + tag, e);
141
			return null;
142
		}
143
		return null;
144
	}
145
}
(4-4/4)