Project

General

Profile

« Previous | Next » 

Revision 46715

cleanup, introduced text_ci to index untokenized field values and allow case insensitive querying

View differences:

schemaTemplate.xslt
39 39
				  -->
40 40
				<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
41 41

  
42
				<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
43
					<analyzer>
44
						<tokenizer class="solr.KeywordTokenizerFactory"/>
45
						<filter class="solr.LowerCaseFilterFactory" />
46
					</analyzer>
47
				</fieldType>
48

  
42 49
				<!-- boolean type: "true" or "false" -->
43 50
				<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
44 51

  
......
161 168
						-->
162 169
						<filter class="solr.StopFilterFactory"
163 170
						        ignoreCase="true"
164
						        words="stopwords_en.txt"
165
						/>
171
						        words="stopwords_en.txt" />
166 172
						<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
167 173
						<filter class="solr.LowerCaseFilterFactory"/>
168 174
						<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
......
173 179
						<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
174 180
						<filter class="solr.StopFilterFactory"
175 181
						        ignoreCase="true"
176
						        words="stopwords_en.txt"
177
						/>
182
						        words="stopwords_en.txt" />
178 183
						<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
179 184
						<filter class="solr.LowerCaseFilterFactory"/>
180 185
						<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
......
246 251
						<tokenizer class="solr.WhitespaceTokenizerFactory"/>
247 252
						<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
248 253
						<filter class="solr.StopFilterFactory" ignoreCase="true"
249
						        words="stopwords.txt" enablePositionIncrements="true"
250
						/>
254
						        words="stopwords.txt" enablePositionIncrements="true" />
251 255
						<filter class="solr.WordDelimiterFilterFactory"
252 256
						        generateWordParts="1" generateNumberParts="1"
253 257
						        catenateWords="0" catenateNumbers="0"
......
256 260
					</analyzer>
257 261
				</fieldType>
258 262

  
259
				<fieldType name="spelltext" class="solr.TextField" positionIncrementGap="100">
260
					<analyzer type="index">
261
						<tokenizer class="solr.StandardTokenizerFactory"/>
262
						<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
263
						<filter class="solr.StandardFilterFactory"/>
264
						<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
265
					</analyzer>
266
					<analyzer type="query">
267
						<tokenizer class="solr.StandardTokenizerFactory"/>
268
						<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
269
						<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
270
						<filter class="solr.StandardFilterFactory"/>
271
						<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
272
					</analyzer>
273
				</fieldType>
274

  
275 263
				<!-- used for objIdentifiers -->
276 264
				<fieldType name="long_keyword" class="solr.TextField">
277 265
					<analyzer>
......
291 279
							<xsl:when test="@type='date'">date</xsl:when>
292 280
							<xsl:when test="@type='ngramtext'">ngramtext</xsl:when>
293 281
							<xsl:when test="@type='long_keyword'">long_keyword</xsl:when>
294
							<xsl:when test="@tokenizable='false'">string</xsl:when>
282
							<xsl:when test="@tokenizable='false'">string_ci</xsl:when>
295 283
							<xsl:otherwise>
296 284
								<xsl:value-of select="$textFieldType"/>
297 285
							</xsl:otherwise>
......
321 309

  
322 310
				<field name="__dsversion" type="tdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
323 311

  
324
				<field name="__result" type="{$textFieldType}" indexed="false" stored="true"/>
312
				<field name="__result" type="string_ci" indexed="false" stored="true"/>
325 313

  
326
				<field name="__fulltext" type="{$textFieldType}" indexed="false" stored="true" default=""/>
327

  
328 314
				<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
329 315

  
330
				<field name="__spell" type="spelltext" indexed="true" stored="false" omitNorms="true" omitTermFreqAndPositions="true"/>
331

  
332
				<field name="cql.serverchoice" type="{$textFieldType}" indexed="true" stored="false" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true"/>
333

  
334 316
				<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
335 317

  
336 318
				<!-- catchall text field that indexes tokens both normally and in reverse for efficient
......
340 322
				<!-- field for ping -->
341 323
				<field name="text" type="text_common" indexed="false" stored="false"/>
342 324

  
343

  
344 325
			</fields>
345 326

  
346 327
			<!-- Field to use to determine and enforce document uniqueness.
......
359 340
				<copyField source="{$fieldname}" dest="__all"/>
360 341
			</xsl:for-each>
361 342

  
362
			<xsl:for-each select="./FIELD[@spellcheck = 'true']">
363
				<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
364
				<copyField source="{$fieldname}" dest="__spell"/>
365
			</xsl:for-each>
366

  
367
			<copyField source="cql.serverchoice" dest="__all"/>
368
			<copyField source="__fulltext" dest="__all"/>
369

  
370

  
371
			<!-- Similarity is the scoring routine for each document vs. a query.
372
				 A custom similarity may be specified here, but the default is fine
373
				 for most applications.  -->
374
			<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
375
			<!-- ... OR ...
376
				 Specify a SimilarityFactory class name implementation
377
				 allowing parameters to be used.
378
			-->
379
			<!--
380
			<similarity class="com.example.solr.CustomSimilarityFactory">
381
			  <str name="paramkey">param value</str>
382
			</similarity>
383
			-->
384

  
385

  
386 343
		</schema>
387 344
	</xsl:template>
388 345
</xsl:stylesheet>

Also available in: Unified diff