Revision 54274
Added by Claudio Atzori over 5 years ago
modules/dnet-index-solr-common/trunk/src/main/java/eu/dnetlib/functionality/index/solr/feed/StreamingInputDocumentFactory.java | ||
---|---|---|
13 | 13 |
import com.google.common.collect.Lists; |
14 | 14 |
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer.Mode; |
15 | 15 |
import org.apache.solr.common.SolrInputDocument; |
16 |
import org.apache.solr.common.StringUtils; |
|
16 | 17 |
|
17 | 18 |
/** |
18 | 19 |
* Optimized version of the document parser, drop in replacement of InputDocumentFactory. |
... | ... | |
44 | 45 |
|
45 | 46 |
protected static final String ROOT_ELEMENT = "indexRecord"; |
46 | 47 |
|
48 |
protected static final int MAX_FIELD_LENGTH = 32765; |
|
49 |
|
|
47 | 50 |
protected ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() { |
48 | 51 |
|
49 | 52 |
@Override |
... | ... | |
257 | 260 |
if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + text.asEndElement().getName().getLocalPart()); |
258 | 261 |
return ""; |
259 | 262 |
|
260 |
return text.asCharacters().getData(); |
|
263 |
final String data = text.asCharacters().getData(); |
|
264 |
if (data != null && data.length() > MAX_FIELD_LENGTH) { |
|
265 |
return data.substring(0, MAX_FIELD_LENGTH); |
|
266 |
} |
|
267 |
|
|
268 |
return data; |
|
261 | 269 |
} |
262 | 270 |
|
263 | 271 |
} |
Also available in: Unified diff
field length capped to 32765 characters