/modules/dnet-data-provision-services/branches/saxonHE-SOLR-772/src/main/java/eu/dnetlib/index/solr/feed/StreamingInputDocumentFactory.java - Diff - D-Net - D-Net project tracking tool

« Previous | Next »

Revision 57157

Added by Enrico Ottonello over 4 years ago

solr 772 integration

     import java.io.StringReader;
     import java.io.StringWriter;
     import java.util.HashMap;
     import java.util.Iterator;
     import java.util.LinkedList;
     import java.util.List;
     import javax.xml.stream.*;
     import javax.xml.stream.XMLEventFactory;
     import javax.xml.stream.XMLEventReader;
     import javax.xml.stream.XMLEventWriter;
     import javax.xml.stream.XMLInputFactory;
     import javax.xml.stream.XMLOutputFactory;
     import javax.xml.stream.XMLStreamException;
     import javax.xml.stream.events.Namespace;
     import javax.xml.stream.events.StartElement;
     import javax.xml.stream.events.XMLEvent;
     import eu.dnetlib.index.solr.feed.ResultTransformer.Mode;
     import org.apache.solr.common.SolrInputDocument;
     import com.google.common.collect.Lists;
     import eu.dnetlib.index.solr.feed.ResultTransformer.Mode;
     /**
      * Optimized version of the document parser, drop in replacement of InputDocumentFactory.
+     *
      * <p>
      * <p>
      * Faster because:
      * </p>
      * <ul>
-...
      * <li>Quickly serialize the 'result' element directly in a string.</li>
      * <li>Uses less memory: less pressure on GC and allows more threads to process this in parallel</li>
      * </ul>
+     *
      * <p>
      * <p>
      * This class is fully reentrant and can be invoked in parallel.
      * </p>
+     *
      * @author marko
+     *
      */
     public class StreamingInputDocumentFactory extends InputDocumentFactory {
     	protected static final String DEFAULTDNETRESULT = "dnetResult";
-...
     	protected static final String ROOT_ELEMENT = "indexRecord";
     	protected static final int MAX_FIELD_LENGTH = 25000;
     	protected ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() {
     		@Override
-...
+    		}
     	};
     	/**
     	 * {@inheritDoc}
     	 */
     	@Override
     	public SolrInputDocument parseDocument(final String version, final String inputDocument, final String dsId, final String resultName)
     			throws XMLStreamException {
     		return parseDocument(version, inputDocument, dsId, resultName, null);
+    	}
     	/**
     	 * {@inheritDoc}
     	 * <p>
     	 * String, com.google.common.base.Function)
     	 */
     	@Override
     	public SolrInputDocument parseDocument(final String version,
     			final String inputDocument,
-...
     			final ResultTransformer resultTransformer) {
     		final StringWriter results = new StringWriter();
     		final List<Namespace> nsList = new LinkedList<>();
     		final List<Namespace> nsList = Lists.newLinkedList();
     		try {
     			XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument));
     			final SolrInputDocument indexDocument = new SolrInputDocument();
     			final SolrInputDocument indexDocument = new SolrInputDocument(new HashMap<>());
     			while (parser.hasNext()) {
     				final XMLEvent event = parser.nextEvent();
-...
+    	}
     	private List<Namespace> getNamespaces(final XMLEvent event) {
     		final List<Namespace> res = new LinkedList<>();
     		final List<Namespace> res = Lists.newLinkedList();
     		@SuppressWarnings("unchecked")
     		Iterator<Namespace> nsIter = event.asStartElement().getNamespaces();
     		while (nsIter.hasNext()) {
-...
     	 * Helper used to get the string from a text element.
+    	 *
     	 * @param text
     	 * @return
     	 * @return the
     	 */
     	protected final String getText(final XMLEvent text) {
     		if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + text.asEndElement().getName().getLocalPart());
     			return "";
     		return text.asCharacters().getData();
     		final String data = text.asCharacters().getData();
     		if (data != null && data.length() > MAX_FIELD_LENGTH) {
     			return data.substring(0, MAX_FIELD_LENGTH);
+    		}
     		return data;
+    	}
+    }

Also available in: Unified diff

Project

General

Profile

D-Net

Revision 57157

Added by Enrico Ottonello over 4 years ago