/modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/SplitterDatasetsIterator.java - D-Net - D-Net project tracking tool

dnet45/modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/datacite/SplitterDatasetsIterator.java @ 48955

       package eu.dnetlib.msro.openaireplus.workflows.nodes.datacite;
       import java.io.ByteArrayInputStream;
       import java.io.InputStream;
       import java.io.StringWriter;
       import java.nio.charset.StandardCharsets;
       import java.util.ArrayList;
       import java.util.List;
       import java.util.Map;
       import java.util.Queue;
       import javax.xml.XMLConstants;
       import javax.xml.parsers.DocumentBuilderFactory;
       import javax.xml.transform.Transformer;
       import javax.xml.transform.TransformerFactory;
       import javax.xml.transform.dom.DOMSource;
       import javax.xml.transform.stream.StreamResult;
       import javax.xml.xpath.XPath;
       import javax.xml.xpath.XPathConstants;
       import javax.xml.xpath.XPathFactory;
       import com.google.common.collect.Maps;
       import org.apache.commons.lang.StringUtils;
       import org.apache.commons.logging.Log;
       import org.apache.commons.logging.LogFactory;
       import org.w3c.dom.*;
       /**
        * The Class SplitterDatasetsIterator.
        */
       public class SplitterDatasetsIterator {
       	/**
       	 * The Constant log.
       	 */
       	private static final Log log = LogFactory.getLog(SplitterDatasetsIterator.class);
       	/**
       	 * The end queue.
       	 */
       	public static String END_QUEUE = "END_QUEUE";
       	/**
       	 * The publications.
       	 */
       	private Queue<String> publications;
       	/**
       	 * The input epr.
       	 */
       	private Iterable<String> inputEPR;
       	/**
       	 * The root name.
       	 */
       	private String rootName;
       	/**
       	 * Instantiates a new splitter datasets iterator.
+      	 *
       	 * @param publicationsQueue the publications queue
       	 * @param inputEPR          the input epr
       	 * @param rootName          the root name
       	 */
       	public SplitterDatasetsIterator(final Queue<String> publicationsQueue, final Iterable<String> inputEPR, final String rootName) {
       		this.publications = publicationsQueue;
       		this.inputEPR = inputEPR;
       		this.rootName = rootName;
+      	}
       	/**
       	 * Populate queues.
       	 */
       	public void populateQueues() {
       		if (this.inputEPR == null) return;
       		for (String inputXML : inputEPR) {
       			final ByteArrayInputStream bais = new ByteArrayInputStream(inputXML.getBytes(StandardCharsets.UTF_8));
       			final List<String> publication_extracted = extractByTag(bais, "publication");
       			if (publication_extracted != null) {
       				publications.addAll(publication_extracted);
+      			}
+      		}
       		publications.add(END_QUEUE);
+      	}
       	/**
       	 * Extract by tag.
+      	 *
       	 * @param inputXML the input xml
       	 * @param tag      the tag
       	 * @return the list
       	 */
       	private List<String> extractByTag(final InputStream inputXML, final String tag) {
       		try {
       			DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
       			Document doc = dbf.newDocumentBuilder().parse(inputXML);
       			XPath xpath = XPathFactory.newInstance().newXPath();
       			Node rootNode = (Node) xpath.evaluate("//*[local-name()='" + this.rootName + "']", doc, XPathConstants.NODE);
       			NamedNodeMap attributes = rootNode.getAttributes();
       			Map<String, String> nameSpaces = Maps.newHashMap();
       			for (int i = 0; i < attributes.getLength(); i++) {
       				Node node = attributes.item(i);
       				String name = node.getNodeName();
       				if (name.startsWith("xmlns:")) {
       					nameSpaces.put(StringUtils.substringAfter(name, "xmlns:"), node.getNodeValue());
+      				}
+      			}
       			xpath = XPathFactory.newInstance().newXPath();
       			NodeList nodes = (NodeList) xpath.evaluate("//*[local-name()='" + tag + "']/*[local-name()='record']", doc, XPathConstants.NODESET);
       			if ((nodes != null) && (nodes.getLength() > 0)) {
       				List<String> result = new ArrayList<>();
       				for (int i = 0; i < nodes.getLength(); i++) {
       					Document currentDoc = dbf.newDocumentBuilder().newDocument();
       					Node imported = currentDoc.importNode(nodes.item(i), true);
       					for (String key : nameSpaces.keySet()) {
       						Element element = (Element) imported;
       						element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, "xmlns:" + key, nameSpaces.get(key));
+      					}
       					Transformer transformer = TransformerFactory.newInstance().newTransformer();
       					DOMSource mydoc = new DOMSource(imported);
       					StringWriter writer = new StringWriter();
       					transformer.transform(mydoc, new StreamResult(writer));
       					String record = writer.toString();
       					result.add(record);
+      				}
       				return result;
+      			}
       		} catch (Exception e) {
       			log.error("Error on extracting " + tag, e);
       			return null;
+      		}
       		return null;
+      	}
+      }

« Previous
1
2
3
4
Next »

(4-4/4)

Project

General

Profile

D-Net