Revision 53847
Added by Claudio Atzori over 4 years ago
modules/dnet-index-solr-service/branches/solr75/src/main/resources/eu/dnetlib/functionality/index/conf/schemaTemplate.xslt | ||
---|---|---|
29 | 29 |
0.95 : update to solr 4.4, removed attribute "compress" from field definition, ngramfield doesn't support NGramFilterFactory anymore |
30 | 30 |
0.96 : update to solr 4.9 |
31 | 31 |
0.97 : introduced field type string_ci supporting case insensitivity. |
32 |
1.0 : updated to solr 6.6.0 |
|
32 | 33 |
--> |
33 |
<schema name="dnet" version="0.97">
|
|
34 |
<schema name="dnet" version="1.0">
|
|
34 | 35 |
|
35 |
<types> |
|
36 |
<!-- Valid attributes for fields: |
|
37 |
name: mandatory - the name for the field |
|
38 |
type: mandatory - the name of a field type from the |
|
39 |
fieldTypes section |
|
40 |
indexed: true if this field should be indexed (searchable or sortable) |
|
41 |
stored: true if this field should be retrievable |
|
42 |
docValues: true if this field should have doc values. Doc values are |
|
43 |
useful (required, if you are using *Point fields) for faceting, |
|
44 |
grouping, sorting and function queries. Doc values will make the index |
|
45 |
faster to load, more NRT-friendly and more memory-efficient. |
|
46 |
They however come with some limitations: they are currently only |
|
47 |
supported by StrField, UUIDField, all Trie*Fields and *PointFields, |
|
48 |
and depending on the field type, they might require the field to be |
|
49 |
single-valued, be required or have a default value (check the |
|
50 |
documentation of the field type you're interested in for more information) |
|
51 |
multiValued: true if this field may contain multiple values per document |
|
52 |
omitNorms: (expert) set to true to omit the norms associated with |
|
53 |
this field (this disables length normalization and index-time |
|
54 |
boosting for the field, and saves some memory). Only full-text |
|
55 |
fields or fields that need an index-time boost need norms. |
|
56 |
Norms are omitted for primitive (non-analyzed) types by default. |
|
57 |
termVectors: [false] set to true to store the term vector for a |
|
58 |
given field. |
|
59 |
When using MoreLikeThis, fields used for similarity should be |
|
60 |
stored for best performance. |
|
61 |
termPositions: Store position information with the term vector. |
|
62 |
This will increase storage costs. |
|
63 |
termOffsets: Store offset information with the term vector. This |
|
64 |
will increase storage costs. |
|
65 |
required: The field is required. It will throw an error if the |
|
66 |
value does not exist |
|
67 |
default: a value that should be used if no value is specified |
|
68 |
when adding a document. |
|
69 |
--> |
|
36 | 70 |
|
37 |
<!-- The StrField type is not analyzed, but indexed/stored verbatim. |
|
38 |
It supports doc values but in that case the field needs to be |
|
39 |
single-valued and either required or have a default value. |
|
40 |
--> |
|
41 |
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/> |
|
71 |
<!-- field names should consist of alphanumeric or underscore characters only and |
|
72 |
not start with a digit. This is not currently strictly enforced, |
|
73 |
but other field names will not have first class support from all components |
|
74 |
and back compatibility is not guaranteed. Names with both leading and |
|
75 |
trailing underscores (e.g. _version_) are reserved. |
|
76 |
--> |
|
42 | 77 |
|
43 |
<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true"> |
|
44 |
<analyzer> |
|
45 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
46 |
<filter class="solr.LowerCaseFilterFactory" /> |
|
47 |
</analyzer> |
|
48 |
</fieldType> |
|
78 |
<xsl:for-each select="./FIELD"> |
|
79 |
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/> |
|
80 |
<xsl:variable name="fieldtype"> |
|
81 |
<xsl:choose> |
|
82 |
<xsl:when test="@type"><xsl:value-of select="@type"/></xsl:when> |
|
83 |
<!-- this comment is here as a reference, |
|
84 |
once the MDFormat profile will be updated, remove the old type mappings |
|
49 | 85 |
|
50 |
<!-- boolean type: "true" or "false" --> |
|
51 |
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> |
|
86 |
<xsl:when test="@type='int'">pint</xsl:when> |
|
87 |
<xsl:when test="@type='date'">pdate</xsl:when> |
|
88 |
<xsl:when test="@type='ngramtext'">ngramtext</xsl:when> |
|
89 |
<xsl:when test="@type='string_ci'">string_ci</xsl:when> |
|
90 |
--> |
|
91 |
<xsl:when test="@tokenizable='false'">string</xsl:when> |
|
92 |
<xsl:otherwise> |
|
93 |
<xsl:value-of select="$textFieldType"/> |
|
94 |
</xsl:otherwise> |
|
95 |
</xsl:choose> |
|
96 |
</xsl:variable> |
|
97 |
<xsl:variable name="isMultivalued"> |
|
98 |
<xsl:choose> |
|
99 |
<xsl:when test="@multivalued='false'">false</xsl:when> |
|
100 |
<xsl:otherwise>true</xsl:otherwise> |
|
101 |
</xsl:choose> |
|
102 |
</xsl:variable> |
|
103 |
<xsl:variable name="isStored"> |
|
104 |
<xsl:choose> |
|
105 |
<xsl:when test="@stored='true'">true</xsl:when> |
|
106 |
<xsl:otherwise>false</xsl:otherwise> |
|
107 |
</xsl:choose> |
|
108 |
</xsl:variable> |
|
52 | 109 |
|
53 |
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are |
|
54 |
currently supported on types that are sorted internally as strings |
|
55 |
and on numeric types. |
|
56 |
This includes "string","boolean", and, as of 3.5 (and 4.x), |
|
57 |
int, float, long, date, double, including the "Trie" variants. |
|
58 |
- If sortMissingLast="true", then a sort on this field will cause documents |
|
59 |
without the field to come after documents with the field, |
|
60 |
regardless of the requested sort order (asc or desc). |
|
61 |
- If sortMissingFirst="true", then a sort on this field will cause documents |
|
62 |
without the field to come before documents with the field, |
|
63 |
regardless of the requested sort order. |
|
64 |
- If sortMissingLast="false" and sortMissingFirst="false" (the default), |
|
65 |
then default lucene sorting will be used which places docs without the |
|
66 |
field first in an ascending sort and last in a descending sort. |
|
67 |
--> |
|
110 |
<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/> |
|
111 |
</xsl:for-each> |
|
68 | 112 |
|
69 |
<!-- |
|
70 |
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. |
|
113 |
<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/> |
|
71 | 114 |
|
72 |
These fields support doc values, but they require the field to be |
|
73 |
single-valued and either be required or have a default value. |
|
74 |
--> |
|
75 |
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> |
|
76 |
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> |
|
77 |
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> |
|
78 |
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> |
|
115 |
<field name="__deleted" type="boolean" indexed="true" stored="false" default="false" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
79 | 116 |
|
80 |
<!-- |
|
81 |
Numeric field types that index each value at various levels of precision |
|
82 |
to accelerate range queries when the number of values between the range |
|
83 |
endpoints is large. See the javadoc for NumericRangeQuery for internal |
|
84 |
implementation details. |
|
117 |
<field name="__dsid" type="string" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
85 | 118 |
|
86 |
Smaller precisionStep values (specified in bits) will lead to more tokens |
|
87 |
indexed per value, slightly larger index size, and faster range queries. |
|
88 |
A precisionStep of 0 disables indexing at different precision levels. |
|
89 |
--> |
|
90 |
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> |
|
91 |
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> |
|
92 |
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> |
|
93 |
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> |
|
119 |
<field name="__dsversion" type="pdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
94 | 120 |
|
95 |
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and |
|
96 |
is a more restricted form of the canonical representation of dateTime |
|
97 |
http://www.w3.org/TR/xmlschema-2/#dateTime |
|
98 |
The trailing "Z" designates UTC time and is mandatory. |
|
99 |
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
|
100 |
All other components are mandatory. |
|
121 |
<field name="__result" type="string" indexed="false" stored="true" multiValued="false" docValues="false"/> |
|
101 | 122 |
|
102 |
Expressions can also be used to denote calculations that should be |
|
103 |
performed relative to "NOW" to determine the value, ie... |
|
123 |
<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/> |
|
104 | 124 |
|
105 |
NOW/HOUR |
|
106 |
... Round to the start of the current hour |
|
107 |
NOW-1DAY |
|
108 |
... Exactly 1 day prior to now |
|
109 |
NOW/DAY+6MONTHS+3DAYS |
|
110 |
... 6 months and 3 days in the future from the start of |
|
111 |
the current day |
|
125 |
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" /> |
|
112 | 126 |
|
113 |
Consult the DateField javadocs for more information.
|
|
127 |
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
|
|
114 | 128 |
|
115 |
Note: For faster range queries, consider the tdate type |
|
116 |
--> |
|
117 |
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> |
|
129 |
<!-- field for ping --> |
|
130 |
<field name="text" type="{$textFieldType}" indexed="false" stored="false"/> |
|
118 | 131 |
|
119 |
<!-- A Trie based date field for faster date range queries and date faceting. --> |
|
120 |
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> |
|
132 |
<!-- Field to use to determine and enforce document uniqueness. |
|
133 |
Unless this field is marked with required="false", it will be a required field |
|
134 |
--> |
|
135 |
<uniqueKey>__indexrecordidentifier</uniqueKey> |
|
121 | 136 |
|
122 |
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of |
|
123 |
words on case-change, alpha numeric boundaries, and non-alphanumeric chars, |
|
124 |
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". |
|
125 |
Synonyms and stopwords are customized by external files, and stemming is enabled. |
|
126 |
--> |
|
127 |
<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100"> |
|
128 |
<analyzer type="index"> |
|
129 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
130 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
131 |
words="stopwords.txt" enablePositionIncrements="true"/> |
|
132 |
<filter class="solr.WordDelimiterFilterFactory" |
|
133 |
preserveOriginal="1" generateWordParts="1" |
|
134 |
generateNumberParts="1" catenateWords="1" |
|
135 |
catenateNumbers="1" catenateAll="0"/> |
|
136 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
137 |
<filter class="solr.ASCIIFoldingFilterFactory"/> |
|
138 |
</analyzer> |
|
139 |
<analyzer type="query"> |
|
140 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
141 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
142 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
143 |
words="stopwords.txt" enablePositionIncrements="true"/> |
|
144 |
<filter class="solr.WordDelimiterFilterFactory" |
|
145 |
preserveOriginal="1" generateWordParts="1" |
|
146 |
generateNumberParts="1" catenateWords="0" |
|
147 |
catenateNumbers="0" catenateAll="0"/> |
|
148 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
149 |
<filter class="solr.ASCIIFoldingFilterFactory"/> |
|
150 |
</analyzer> |
|
151 |
</fieldType> |
|
137 |
<xsl:for-each select="./FIELD[(@type = 'ngramtext' or not(@type)) and not(@tokenizable = 'false')]"> |
|
138 |
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/> |
|
139 |
<copyField source="{$fieldname}" dest="__all"/> |
|
140 |
</xsl:for-each> |
|
152 | 141 |
|
153 |
<!-- A text field with defaults appropriate for English, plus |
|
154 |
aggressive word-splitting and autophrase features enabled. |
|
155 |
This field is just like text_en, except it adds |
|
156 |
WordDelimiterFilter to enable splitting and matching of |
|
157 |
words on case-change, alpha numeric boundaries, and |
|
158 |
non-alphanumeric chars. This means certain compound word |
|
159 |
cases will work, for example query "wi fi" will match |
|
160 |
document "WiFi" or "wi-fi". |
|
161 |
--> |
|
162 |
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|
163 |
<analyzer type="index"> |
|
164 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
165 |
<!-- in this example, we will only use synonyms at query time |
|
166 |
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|
167 |
--> |
|
168 |
<!-- Case insensitive stop word removal. |
|
169 |
--> |
|
170 |
<filter class="solr.StopFilterFactory" |
|
171 |
ignoreCase="true" |
|
172 |
words="stopwords_en.txt" /> |
|
173 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
|
174 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
175 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
176 |
<filter class="solr.PorterStemFilterFactory"/> |
|
177 |
</analyzer> |
|
178 |
<analyzer type="query"> |
|
179 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
180 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
181 |
<filter class="solr.StopFilterFactory" |
|
182 |
ignoreCase="true" |
|
183 |
words="stopwords_en.txt" /> |
|
184 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|
185 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
186 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
187 |
<filter class="solr.PorterStemFilterFactory"/> |
|
188 |
</analyzer> |
|
189 |
</fieldType> |
|
142 |
<!-- copyField commands copy one field to another at the time a document |
|
143 |
is added to the index. It's used either to index the same field differently, |
|
144 |
or to add multiple fields to the same field for easier/faster searching. |
|
190 | 145 |
|
191 |
<fieldType name="ngramtext" class="solr.TextField" omitNorms="true"> |
|
192 |
<analyzer type="index"> |
|
193 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
194 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
195 |
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/> |
|
196 |
<filter class="solr.TrimFilterFactory"/> |
|
197 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|
198 |
</analyzer> |
|
199 |
<analyzer type="query"> |
|
200 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
201 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
202 |
</analyzer> |
|
203 |
</fieldType> |
|
146 |
<copyField source="sourceFieldName" dest="destinationFieldName"/> |
|
147 |
--> |
|
204 | 148 |
|
205 |
<fieldType name="text_EFG" class="solr.TextField" positionIncrementGap="100"> |
|
206 |
<analyzer type="index"> |
|
207 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
208 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
209 |
words="stopwords.txt" enablePositionIncrements="true"/> |
|
210 |
<filter class="solr.WordDelimiterFilterFactory" |
|
211 |
preserveOriginal="1" generateWordParts="1" |
|
212 |
generateNumberParts="1" catenateWords="1" |
|
213 |
catenateNumbers="1" catenateAll="0" |
|
214 |
splitOnCaseChange="1"/> |
|
215 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
216 |
<filter class="solr.SnowballPorterFilterFactory" language="German2" protected="protwords.txt"/> |
|
217 |
<filter class="solr.ASCIIFoldingFilterFactory"/> |
|
218 |
</analyzer> |
|
219 |
<analyzer type="query"> |
|
220 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
221 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
222 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
223 |
words="stopwords.txt" enablePositionIncrements="true"/> |
|
224 |
<filter class="solr.WordDelimiterFilterFactory" |
|
225 |
generateWordParts="0" generateNumberParts="1" |
|
226 |
catenateWords="1" catenateNumbers="0" |
|
227 |
catenateAll="0" splitOnCaseChange="1"/> |
|
228 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
229 |
<filter class="solr.SnowballPorterFilterFactory" language="German2" protected="protwords.txt"/> |
|
230 |
<filter class="solr.ASCIIFoldingFilterFactory"/> |
|
231 |
</analyzer> |
|
232 |
</fieldType> |
|
149 |
<!-- field type definitions. The "name" attribute is |
|
150 |
just a label to be used by field definitions. The "class" |
|
151 |
attribute and any other attributes determine the real |
|
152 |
behavior of the fieldType. |
|
153 |
Class names starting with "solr" refer to java classes in a |
|
154 |
standard package such as org.apache.solr.analysis |
|
155 |
--> |
|
233 | 156 |
|
234 |
<!-- A general unstemmed text field that indexes tokens normally and also |
|
235 |
reversed (via ReversedWildcardFilterFactory), to enable more efficient |
|
236 |
leading wildcard queries. --> |
|
237 |
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100"> |
|
238 |
<analyzer type="index"> |
|
239 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
240 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
241 |
words="stopwords.txt" enablePositionIncrements="true"/> |
|
242 |
<filter class="solr.WordDelimiterFilterFactory" |
|
243 |
generateWordParts="1" generateNumberParts="1" |
|
244 |
catenateWords="1" catenateNumbers="1" |
|
245 |
catenateAll="0" splitOnCaseChange="0"/> |
|
246 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
247 |
<filter class="solr.ReversedWildcardFilterFactory" |
|
248 |
withOriginal="true" maxPosAsterisk="3" |
|
249 |
maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
|
250 |
</analyzer> |
|
251 |
<analyzer type="query"> |
|
252 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
253 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
254 |
<filter class="solr.StopFilterFactory" ignoreCase="true" |
|
255 |
words="stopwords.txt" enablePositionIncrements="true" /> |
|
256 |
<filter class="solr.WordDelimiterFilterFactory" |
|
257 |
generateWordParts="1" generateNumberParts="1" |
|
258 |
catenateWords="0" catenateNumbers="0" |
|
259 |
catenateAll="0" splitOnCaseChange="0"/> |
|
260 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
261 |
</analyzer> |
|
262 |
</fieldType> |
|
157 |
<!-- The StrField type is not analyzed, but indexed/stored verbatim. |
|
158 |
It supports doc values but in that case the field needs to be |
|
159 |
single-valued and either required or have a default value. |
|
160 |
--> |
|
161 |
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" /> |
|
162 |
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" /> |
|
263 | 163 |
|
264 |
<!-- used for objIdentifiers --> |
|
265 |
<fieldType name="long_keyword" class="solr.TextField"> |
|
266 |
<analyzer> |
|
267 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
268 |
</analyzer> |
|
269 |
</fieldType> |
|
164 |
<!-- boolean type: "true" or "false" --> |
|
165 |
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> |
|
270 | 166 |
|
271 |
</types>
|
|
167 |
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
|
|
272 | 168 |
|
273 |
<fields> |
|
169 |
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are |
|
170 |
currently supported on types that are sorted internally as strings |
|
171 |
and on numeric types. |
|
172 |
This includes "string","boolean", "int", "float", "long", "date", "double", |
|
173 |
including the "Trie" and "Point" variants. |
|
174 |
- If sortMissingLast="true", then a sort on this field will cause documents |
|
175 |
without the field to come after documents with the field, |
|
176 |
regardless of the requested sort order (asc or desc). |
|
177 |
- If sortMissingFirst="true", then a sort on this field will cause documents |
|
178 |
without the field to come before documents with the field, |
|
179 |
regardless of the requested sort order. |
|
180 |
- If sortMissingLast="false" and sortMissingFirst="false" (the default), |
|
181 |
then default lucene sorting will be used which places docs without the |
|
182 |
field first in an ascending sort and last in a descending sort. |
|
183 |
--> |
|
274 | 184 |
|
275 |
<xsl:for-each select="./FIELD[@indexable = 'true' or @result = 'false']"> |
|
276 |
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/> |
|
277 |
<xsl:variable name="fieldtype"> |
|
278 |
<xsl:choose> |
|
279 |
<xsl:when test="@type='int'">int</xsl:when> |
|
280 |
<xsl:when test="@type='date'">date</xsl:when> |
|
281 |
<xsl:when test="@type='ngramtext'">ngramtext</xsl:when> |
|
282 |
<xsl:when test="@type='string_ci'">string_ci</xsl:when> |
|
283 |
<xsl:when test="@tokenizable='false'">string</xsl:when> |
|
284 |
<xsl:otherwise> |
|
285 |
<xsl:value-of select="$textFieldType"/> |
|
286 |
</xsl:otherwise> |
|
287 |
</xsl:choose> |
|
288 |
</xsl:variable> |
|
289 |
<xsl:variable name="isMultivalued"> |
|
290 |
<xsl:choose> |
|
291 |
<xsl:when test="@multivalued='false'">false</xsl:when> |
|
292 |
<xsl:otherwise>true</xsl:otherwise> |
|
293 |
</xsl:choose> |
|
294 |
</xsl:variable> |
|
295 |
<xsl:variable name="isStored"> |
|
296 |
<xsl:choose> |
|
297 |
<xsl:when test="@stored='true'">true</xsl:when> |
|
298 |
<xsl:otherwise>false</xsl:otherwise> |
|
299 |
</xsl:choose> |
|
300 |
</xsl:variable> |
|
185 |
<!-- |
|
186 |
Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at |
|
187 |
search time and at index time, but some features are still not supported. |
|
188 |
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. |
|
189 |
--> |
|
190 |
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> |
|
191 |
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> |
|
192 |
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> |
|
193 |
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> |
|
301 | 194 |
|
302 |
<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/> |
|
303 |
</xsl:for-each> |
|
195 |
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> |
|
196 |
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> |
|
197 |
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> |
|
198 |
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> |
|
304 | 199 |
|
305 |
<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/> |
|
200 |
<!-- |
|
201 |
Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the |
|
202 |
tint/tfloat/tlong/tdouble types. |
|
203 |
--> |
|
204 |
<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/> |
|
205 |
<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/> |
|
206 |
<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/> |
|
207 |
<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/> |
|
306 | 208 |
|
307 |
<field name="__deleted" type="boolean" indexed="true" stored="false" default="false" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
209 |
<fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/> |
|
210 |
<fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/> |
|
211 |
<fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/> |
|
212 |
<fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/> |
|
308 | 213 |
|
309 |
<field name="__dsid" type="string" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
214 |
<!-- |
|
215 |
Numeric field types that index each value at various levels of precision |
|
216 |
to accelerate range queries when the number of values between the range |
|
217 |
endpoints is large. See the javadoc for NumericRangeQuery for internal |
|
218 |
implementation details. |
|
310 | 219 |
|
311 |
<field name="__dsversion" type="tdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/> |
|
220 |
Smaller precisionStep values (specified in bits) will lead to more tokens |
|
221 |
indexed per value, slightly larger index size, and faster range queries. |
|
222 |
A precisionStep of 0 disables indexing at different precision levels. |
|
312 | 223 |
|
313 |
<field name="__result" type="string" indexed="false" stored="true" omitNorms="true" omitTermFreqAndPositions="true" /> |
|
224 |
Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible |
|
225 |
--> |
|
226 |
<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/> |
|
227 |
<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/> |
|
228 |
<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/> |
|
229 |
<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/> |
|
314 | 230 |
|
315 |
<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/> |
|
231 |
<fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/> |
|
232 |
<fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/> |
|
233 |
<fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/> |
|
234 |
<fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/> |
|
316 | 235 |
|
317 |
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/> |
|
236 |
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and |
|
237 |
is a more restricted form of the canonical representation of dateTime |
|
238 |
http://www.w3.org/TR/xmlschema-2/#dateTime |
|
239 |
The trailing "Z" designates UTC time and is mandatory. |
|
240 |
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
|
241 |
All other components are mandatory. |
|
318 | 242 |
|
319 |
<!-- catchall text field that indexes tokens both normally and in reverse for efficient |
|
320 |
leading wildcard queries. --> |
|
321 |
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/> |
|
243 |
Expressions can also be used to denote calculations that should be |
|
244 |
performed relative to "NOW" to determine the value, ie... |
|
322 | 245 |
|
323 |
<!-- field for ping --> |
|
324 |
<field name="text" type="text_common" indexed="false" stored="false"/> |
|
246 |
NOW/HOUR |
|
247 |
... Round to the start of the current hour |
|
248 |
NOW-1DAY |
|
249 |
... Exactly 1 day prior to now |
|
250 |
NOW/DAY+6MONTHS+3DAYS |
|
251 |
... 6 months and 3 days in the future from the start of |
|
252 |
the current day |
|
325 | 253 |
|
326 |
</fields> |
|
254 |
Consult the TrieDateField javadocs for more information. |
|
255 |
--> |
|
256 |
<!-- KD-tree versions of date fields --> |
|
257 |
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> |
|
258 |
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> |
|
327 | 259 |
|
328 |
<!-- Field to use to determine and enforce document uniqueness. |
|
329 |
Unless this field is marked with required="false", it will be a required field |
|
260 |
<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/> |
|
261 |
<fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/> |
|
262 |
|
|
263 |
<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/> |
|
264 |
<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/> |
|
265 |
|
|
266 |
|
|
267 |
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> |
|
268 |
<fieldType name="binary" class="solr.BinaryField"/> |
|
269 |
|
|
270 |
<!-- The "RandomSortField" is not used to store or search any |
|
271 |
data. You can declare fields of this type it in your schema |
|
272 |
to generate pseudo-random orderings of your docs for sorting |
|
273 |
or function purposes. The ordering is generated based on the field |
|
274 |
name and the version of the index. As long as the index version |
|
275 |
remains unchanged, and the same field name is reused, |
|
276 |
the ordering of the docs will be consistent. |
|
277 |
If you want different psuedo-random orderings of documents, |
|
278 |
for the same version of the index, use a dynamicField and |
|
279 |
change the field name in the request. |
|
280 |
--> |
|
281 |
<fieldType name="random" class="solr.RandomSortField" indexed="true" /> |
|
282 |
|
|
283 |
<!-- solr.TextField allows the specification of custom text analyzers |
|
284 |
specified as a tokenizer and a list of token filters. Different |
|
285 |
analyzers may be specified for indexing and querying. |
|
286 |
|
|
287 |
The optional positionIncrementGap puts space between multiple fields of |
|
288 |
this type on the same document, with the purpose of preventing false phrase |
|
289 |
matching across fields. |
|
290 |
|
|
291 |
For more info on customizing your analyzer chain, please see |
|
292 |
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters |
|
293 |
--> |
|
294 |
|
|
295 |
<!-- One can also specify an existing Analyzer class that has a |
|
296 |
default constructor via the class attribute on the analyzer element. |
|
297 |
Example: |
|
298 |
<fieldType name="text_greek" class="solr.TextField"> |
|
299 |
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> |
|
300 |
</fieldType> |
|
301 |
--> |
|
302 |
|
|
303 |
<!-- A text field that only splits on whitespace for exact matching of words --> |
|
304 |
<!-- <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> --> |
|
305 |
|
|
306 |
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
|
307 |
<analyzer> |
|
308 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
309 |
</analyzer> |
|
310 |
</fieldType> |
|
311 |
|
|
312 |
<fieldType name="ngramtext" class="solr.TextField"> |
|
313 |
<analyzer type="index"> |
|
314 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
315 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
316 |
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/> |
|
317 |
<filter class="solr.TrimFilterFactory"/> |
|
318 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|
319 |
</analyzer> |
|
320 |
<analyzer type="query"> |
|
321 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
322 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
323 |
</analyzer> |
|
324 |
</fieldType> |
|
325 |
|
|
326 |
|
|
327 |
<fieldType name="personName" class="solr.TextField" positionIncrementGap="100"> |
|
328 |
<analyzer> |
|
329 |
<tokenizer class="solr.StandardTokenizerFactory" /> |
|
330 |
<filter class="solr.LowerCaseFilterFactory" /> |
|
331 |
</analyzer> |
|
332 |
</fieldType> |
|
333 |
|
|
334 |
<fieldType name="personNamePrefix" class="solr.TextField" positionIncrementGap="100"> |
|
335 |
<analyzer type="index"> |
|
336 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
337 |
<filter class="solr.LowerCaseFilterFactory" /> |
|
338 |
<filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="30" /> |
|
339 |
</analyzer> |
|
340 |
<analyzer type="query"> |
|
341 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
342 |
<filter class="solr.LowerCaseFilterFactory" /> |
|
343 |
</analyzer> |
|
344 |
</fieldType> |
|
345 |
|
|
346 |
|
|
347 |
<!-- A general text field that has reasonable, generic |
|
348 |
cross-language defaults: it tokenizes with StandardTokenizer, |
|
349 |
removes stop words from case-insensitive "stopwords.txt" |
|
350 |
(empty by default), and down cases. At query time only, it |
|
351 |
also applies synonyms. |
|
330 | 352 |
--> |
331 |
<uniqueKey>__indexrecordidentifier</uniqueKey> |
|
353 |
<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100" multiValued="true"> |
|
354 |
<analyzer type="index"> |
|
355 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
356 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|
357 |
<!-- in this example, we will only use synonyms at query time |
|
358 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|
359 |
<filter class="solr.FlattenGraphFilterFactory"/> |
|
360 |
--> |
|
361 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
362 |
</analyzer> |
|
363 |
<analyzer type="query"> |
|
364 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
365 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|
366 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
367 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
368 |
</analyzer> |
|
369 |
</fieldType> |
|
332 | 370 |
|
333 |
<!-- field for the QueryParser to use when an explicit fieldname is absent --> |
|
334 |
<defaultSearchField>__all</defaultSearchField> |
|
371 |
<!-- A text field with defaults appropriate for English: it |
|
372 |
tokenizes with StandardTokenizer, removes English stop words |
|
373 |
(stopwords_en.txt), down cases, protects words from protwords.txt, and |
|
374 |
finally applies Porter's stemming. The query time analyzer |
|
375 |
also applies synonyms from synonyms.txt. --> |
|
376 |
<!-- <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> --> |
|
335 | 377 |
|
336 |
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> |
|
337 |
<solrQueryParser defaultOperator="AND"/> |
|
378 |
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
|
379 |
<analyzer type="index"> |
|
380 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
381 |
<!-- in this example, we will only use synonyms at query time |
|
382 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|
383 |
<filter class="solr.FlattenGraphFilterFactory"/> |
|
384 |
--> |
|
385 |
<!-- Case insensitive stop word removal. |
|
386 |
--> |
|
387 |
<filter class="solr.StopFilterFactory" |
|
388 |
ignoreCase="true" |
|
389 |
words="stopwords.txt" |
|
390 |
/> |
|
391 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
392 |
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|
393 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
394 |
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|
395 |
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|
396 |
--> |
|
397 |
<filter class="solr.PorterStemFilterFactory"/> |
|
398 |
</analyzer> |
|
399 |
<analyzer type="query"> |
|
400 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
401 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
402 |
<filter class="solr.StopFilterFactory" |
|
403 |
ignoreCase="true" |
|
404 |
words="stopwords.txt" |
|
405 |
/> |
|
406 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
407 |
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|
408 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
409 |
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|
410 |
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|
411 |
--> |
|
412 |
<filter class="solr.PorterStemFilterFactory"/> |
|
413 |
</analyzer> |
|
414 |
</fieldType> |
|
338 | 415 |
|
339 |
<xsl:for-each select="./FIELD[(@type = 'ngramtext' or not(@type)) and not(@tokenizable = 'false')]"> |
|
340 |
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/> |
|
341 |
<copyField source="{$fieldname}" dest="__all"/> |
|
342 |
</xsl:for-each> |
|
416 |
<!-- A text field with defaults appropriate for English, plus |
|
417 |
aggressive word-splitting and autophrase features enabled. |
|
418 |
This field is just like text_en, except it adds |
|
419 |
WordDelimiterGraphFilter to enable splitting and matching of |
|
420 |
words on case-change, alpha numeric boundaries, and |
|
421 |
non-alphanumeric chars. This means certain compound word |
|
422 |
cases will work, for example query "wi fi" will match |
|
423 |
document "WiFi" or "wi-fi". |
|
424 |
--> |
|
425 |
<!-- <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> --> |
|
426 |
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|
427 |
<analyzer type="index"> |
|
428 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
429 |
<!-- in this example, we will only use synonyms at query time |
|
430 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|
431 |
--> |
|
432 |
<!-- Case insensitive stop word removal. |
|
433 |
--> |
|
434 |
<filter class="solr.StopFilterFactory" |
|
435 |
ignoreCase="true" |
|
436 |
words="stopwords.txt" |
|
437 |
/> |
|
438 |
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
|
439 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
440 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
441 |
<filter class="solr.PorterStemFilterFactory"/> |
|
442 |
<filter class="solr.FlattenGraphFilterFactory" /> |
|
443 |
</analyzer> |
|
444 |
<analyzer type="query"> |
|
445 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
446 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
447 |
<filter class="solr.StopFilterFactory" |
|
448 |
ignoreCase="true" |
|
449 |
words="stopwords.txt" |
|
450 |
/> |
|
451 |
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|
452 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
453 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
454 |
<filter class="solr.PorterStemFilterFactory"/> |
|
455 |
</analyzer> |
|
456 |
</fieldType> |
|
343 | 457 |
|
458 |
<!-- Less flexible matching, but less false matches. Probably not ideal for product names, |
|
459 |
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> |
|
460 |
<!-- <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> --> |
|
461 |
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|
462 |
<analyzer type="index"> |
|
463 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
464 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
|
465 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> |
|
466 |
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
|
467 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
468 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
469 |
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|
470 |
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
|
471 |
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
|
472 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|
473 |
<filter class="solr.FlattenGraphFilterFactory" /> |
|
474 |
</analyzer> |
|
475 |
<analyzer type="query"> |
|
476 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|
477 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
|
478 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> |
|
479 |
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
|
480 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
481 |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|
482 |
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|
483 |
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
|
484 |
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
|
485 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|
486 |
</analyzer> |
|
487 |
</fieldType> |
|
488 |
|
|
489 |
<!-- Just like text_common except it reverses the characters of |
|
490 |
each token, to enable more efficient leading wildcard queries. |
|
491 |
--> |
|
492 |
<!-- <dynamicField name="*_txt_rev" type="text_common_rev" indexed="true" stored="true"/> --> |
|
493 |
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100"> |
|
494 |
<analyzer type="index"> |
|
495 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
496 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|
497 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
498 |
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" |
|
499 |
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
|
500 |
</analyzer> |
|
501 |
<analyzer type="query"> |
|
502 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
503 |
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|
504 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|
505 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
506 |
</analyzer> |
|
507 |
</fieldType> |
|
508 |
|
|
509 |
<!-- <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> --> |
|
510 |
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > |
|
511 |
<analyzer> |
|
512 |
<tokenizer class="solr.StandardTokenizerFactory"/> |
|
513 |
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> |
|
514 |
</analyzer> |
|
515 |
</fieldType> |
|
516 |
|
|
517 |
<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true"> |
|
518 |
<analyzer type="query"> |
|
519 |
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|
520 |
<filter class="solr.LowerCaseFilterFactory"/> |
|
521 |
</analyzer> |
|
522 |
</fieldType> |
|
523 |
|
|
524 |
<!-- |
|
525 |
Example of using PathHierarchyTokenizerFactory at index time, so |
|
526 |
queries for paths match documents at that path, or in descendent paths |
|
527 |
--> |
|
528 |
<!-- <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> --> |
|
529 |
<fieldType name="descendent_path" class="solr.TextField"> |
|
530 |
<analyzer type="index"> |
|
531 |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
|
532 |
</analyzer> |
|
533 |
<analyzer type="query"> |
|
534 |
<tokenizer class="solr.KeywordTokenizerFactory" /> |
|
535 |
</analyzer> |
|
536 |
</fieldType> |
|
537 |
|
|
538 |
<!-- |
|
539 |
Example of using PathHierarchyTokenizerFactory at query time, so |
|
540 |
queries for paths match documents at that path, or in ancestor paths |
|
541 |
--> |
|
542 |
<!-- <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> --> |
|
543 |
<fieldType name="ancestor_path" class="solr.TextField"> |
|
544 |
<analyzer type="index"> |
|
545 |
<tokenizer class="solr.KeywordTokenizerFactory" /> |
|
546 |
</analyzer> |
|
547 |
<analyzer type="query"> |
|
548 |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
|
549 |
</analyzer> |
|
550 |
</fieldType> |
|
551 |
|
|
552 |
<!-- since fields of this type are by default not stored or indexed, |
|
553 |
any data added to them will be ignored outright. --> |
|
554 |
<fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" /> |
|
555 |
|
|
556 |
<!-- This point type indexes the coordinates as separate fields (subFields) |
|
557 |
If subFieldType is defined, it references a type, and a dynamic field |
|
558 |
definition is created matching *___<typename>. Alternately, if |
|
559 |
subFieldSuffix is defined, that is used to create the subFields. |
|
560 |
Example: if subFieldType="double", then the coordinates would be |
|
561 |
indexed in fields myloc_0___double,myloc_1___double. |
|
562 |
Example: if subFieldSuffix="_d" then the coordinates would be indexed |
|
563 |
in fields myloc_0_d,myloc_1_d |
|
564 |
The subFields are an implementation detail of the fieldType, and end |
|
565 |
users normally should not need to know about them. |
|
566 |
--> |
|
567 |
<!-- <dynamicField name="*_point" type="point" indexed="true" stored="true"/> --> |
|
568 |
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
|
569 |
|
|
570 |
<!-- A specialized field for geospatial search filters and distance sorting. --> |
|
571 |
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/> |
|
572 |
|
|
573 |
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes. |
|
574 |
For more information about this and other Spatial fields new to Solr 4, see: |
|
575 |
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 |
|
576 |
--> |
|
577 |
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" |
|
578 |
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> |
|
579 |
|
|
344 | 580 |
</schema> |
345 | 581 |
</xsl:template> |
346 |
</xsl:stylesheet> |
|
582 |
</xsl:stylesheet> |
modules/dnet-index-solr-service/branches/solr75/src/main/resources/eu/dnetlib/functionality/index/conf/solrconfig.xml.st | ||
---|---|---|
16 | 16 |
limitations under the License. |
17 | 17 |
--> |
18 | 18 |
|
19 |
<!--
|
|
19 |
<!-- |
|
20 | 20 |
For more details about configurations options that may appear in |
21 |
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
|
21 |
this file, see http://wiki.apache.org/solr/SolrConfigXml. |
|
22 | 22 |
--> |
23 | 23 |
<config> |
24 |
<!-- In all configuration below, a prefix of "solr." for class names
|
|
25 |
is an alias that causes solr to search appropriate packages,
|
|
26 |
including org.apache.solr.(search|update|request|core|analysis)
|
|
24 |
<!-- In all configuration below, a prefix of "solr." for class names
|
|
25 |
is an alias that causes solr to search appropriate packages,
|
|
26 |
including org.apache.solr.(search|update|request|core|analysis)
|
|
27 | 27 |
|
28 |
You may also specify a fully qualified Java classname if you
|
|
29 |
have your own custom plugins.
|
|
30 |
-->
|
|
28 |
You may also specify a fully qualified Java classname if you
|
|
29 |
have your own custom plugins.
|
|
30 |
-->
|
|
31 | 31 |
|
32 |
<!-- Controls what version of Lucene various components of Solr
|
|
33 |
adhere to. Generally, you want to use the latest version to
|
|
34 |
get all bug fixes and improvements. It is highly recommended
|
|
35 |
that you fully re-index after changing this setting as it can
|
|
36 |
affect both how text is indexed and queried.
|
|
37 |
-->
|
|
38 |
<luceneMatchVersion>$luceneMatchVersion$</luceneMatchVersion>
|
|
32 |
<!-- Controls what version of Lucene various components of Solr
|
|
33 |
adhere to. Generally, you want to use the latest version to
|
|
34 |
get all bug fixes and improvements. It is highly recommended
|
|
35 |
that you fully re-index after changing this setting as it can
|
|
36 |
affect both how text is indexed and queried.
|
|
37 |
-->
|
|
38 |
<luceneMatchVersion>$luceneMatchVersion$</luceneMatchVersion>
|
|
39 | 39 |
|
40 |
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
|
41 |
identified and use them to resolve any "plugins" specified in
|
|
42 |
your solrconfig.xml or schema.xml (ie: Analyzers, Request
|
|
43 |
Handlers, etc...).
|
|
40 |
<!-- <lib/> directives can be used to instruct Solr to load any Jars
|
|
41 |
identified and use them to resolve any "plugins" specified in
|
|
42 |
your solrconfig.xml or schema.xml (ie: Analyzers, Request
|
|
43 |
Handlers, etc...).
|
|
44 | 44 |
|
45 |
All directories and paths are resolved relative to the
|
|
46 |
instanceDir.
|
|
45 |
All directories and paths are resolved relative to the
|
|
46 |
instanceDir.
|
|
47 | 47 |
|
48 |
Please note that <lib/> directives are processed in the order
|
|
49 |
that they appear in your solrconfig.xml file, and are "stacked"
|
|
50 |
on top of each other when building a ClassLoader - so if you have
|
|
51 |
plugin jars with dependencies on other jars, the "lower level"
|
|
52 |
dependency jars should be loaded first.
|
|
48 |
Please note that <lib/> directives are processed in the order
|
|
49 |
that they appear in your solrconfig.xml file, and are "stacked"
|
|
50 |
on top of each other when building a ClassLoader - so if you have
|
|
51 |
plugin jars with dependencies on other jars, the "lower level"
|
|
52 |
dependency jars should be loaded first.
|
|
53 | 53 |
|
54 |
If a "./lib" directory exists in your instanceDir, all files |
|
55 |
found in it are included as if you had used the following |
|
56 |
syntax... |
|
57 |
|
|
58 |
<lib dir="./lib" /> |
|
59 |
--> |
|
54 |
If a "./lib" directory exists in your instanceDir, all files |
|
55 |
found in it are included as if you had used the following |
|
56 |
syntax... |
|
60 | 57 |
|
61 |
<!-- A 'dir' option by itself adds any files found in the directory |
|
62 |
to the classpath, this is useful for including all jars in a |
|
63 |
directory. |
|
58 |
<lib dir="./lib" /> |
|
59 |
--> |
|
64 | 60 |
|
65 |
When a 'regex' is specified in addition to a 'dir', only the
|
|
66 |
files in that directory which completely match the regex
|
|
67 |
(anchored on both ends) will be included.
|
|
61 |
<!-- A 'dir' option by itself adds any files found in the directory
|
|
62 |
to the classpath, this is useful for including all jars in a
|
|
63 |
directory.
|
|
68 | 64 |
|
69 |
If a 'dir' option (with or without a regex) is used and nothing |
|
70 |
is found that matches, a warning will be logged. |
|
65 |
When a 'regex' is specified in addition to a 'dir', only the |
|
66 |
files in that directory which completely match the regex |
|
67 |
(anchored on both ends) will be included. |
|
71 | 68 |
|
72 |
The examples below can be used to load some solr-contribs along |
|
73 |
with their external dependencies. |
|
74 |
--> |
|
75 |
<lib dir="$serverLibPath$" regex=".*\.jar" /> |
|
69 |
If a 'dir' option (with or without a regex) is used and nothing |
|
70 |
is found that matches, a warning will be logged. |
|
76 | 71 |
|
77 |
<!-- an exact 'path' can be used instead of a 'dir' to specify a |
|
78 |
specific jar file. This will cause a serious error to be logged |
|
79 |
if it can't be loaded. |
|
80 |
--> |
|
81 |
<!-- |
|
82 |
<lib path="../a-jar-that-does-not-exist.jar" /> |
|
83 |
--> |
|
84 |
|
|
85 |
<!-- Data Directory |
|
72 |
The examples below can be used to load some solr-contribs along |
|
73 |
with their external dependencies. |
|
74 |
--> |
|
75 |
<lib dir="\${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> |
|
76 |
<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> |
|
86 | 77 |
|
87 |
Used to specify an alternate directory to hold all index data |
|
88 |
other than the default ./data under the Solr home. If |
|
89 |
replication is in use, this should match the replication |
|
90 |
configuration. |
|
91 |
--> |
|
92 |
<dataDir>\${solr.data.dir:}</dataDir> |
|
78 |
<lib dir="\${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> |
|
79 |
<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> |
|
93 | 80 |
|
81 |
<lib dir="\${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> |
|
82 |
<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> |
|
94 | 83 |
|
95 |
<!-- The DirectoryFactory to use for indexes. |
|
96 |
|
|
97 |
solr.StandardDirectoryFactory is filesystem |
|
98 |
based and tries to pick the best implementation for the current |
|
99 |
JVM and platform. solr.NRTCachingDirectoryFactory, the default, |
|
100 |
wraps solr.StandardDirectoryFactory and caches small files in memory |
|
101 |
for better NRT performance. |
|
84 |
<lib dir="\${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> |
|
85 |
<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> |
|
102 | 86 |
|
103 |
One can force a particular implementation via solr.MMapDirectoryFactory, |
|
104 |
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. |
|
87 |
<!-- an exact 'path' can be used instead of a 'dir' to specify a |
|
88 |
specific jar file. This will cause a serious error to be logged |
|
89 |
if it can't be loaded. |
|
90 |
--> |
|
91 |
<!-- |
|
92 |
<lib path="../a-jar-that-does-not-exist.jar" /> |
|
93 |
--> |
|
105 | 94 |
|
106 |
solr.RAMDirectoryFactory is memory based, not |
|
107 |
persistent, and doesn't work with replication. |
|
108 |
--> |
|
109 |
<directoryFactory name="DirectoryFactory" |
|
110 |
class="\${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"> |
|
111 |
|
|
112 |
|
|
113 |
<!-- These will be used if you are using the solr.HdfsDirectoryFactory, |
|
114 |
otherwise they will be ignored. If you don't plan on using hdfs, |
|
115 |
you can safely remove this section. --> |
|
116 |
<!-- The root directory that collection data should be written to. --> |
|
117 |
<str name="solr.hdfs.home">\${solr.hdfs.home:}</str> |
|
118 |
<!-- The hadoop configuration files to use for the hdfs client. --> |
|
119 |
<str name="solr.hdfs.confdir">\${solr.hdfs.confdir:}</str> |
|
120 |
<!-- Enable/Disable the hdfs cache. --> |
|
121 |
<str name="solr.hdfs.blockcache.enabled">\${solr.hdfs.blockcache.enabled:true}</str> |
|
122 |
<!-- Enable/Disable using one global cache for all SolrCores. |
|
123 |
The settings used will be from the first HdfsDirectoryFactory created. --> |
|
124 |
<str name="solr.hdfs.blockcache.global">\${solr.hdfs.blockcache.global:true}</str> |
|
125 |
|
|
126 |
</directoryFactory> |
|
95 |
<!-- Data Directory |
|
127 | 96 |
|
128 |
<!-- The CodecFactory for defining the format of the inverted index. |
|
129 |
The default implementation is SchemaCodecFactory, which is the official Lucene |
|
130 |
index format, but hooks into the schema to provide per-field customization of |
|
131 |
the postings lists and per-document values in the fieldType element |
|
132 |
(postingsFormat/docValuesFormat). Note that most of the alternative implementations |
|
133 |
are experimental, so if you choose to customize the index format, its a good |
|
134 |
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) |
|
135 |
before upgrading to a newer version to avoid unnecessary reindexing. |
|
136 |
--> |
|
137 |
<codecFactory class="solr.SchemaCodecFactory"/> |
|
97 |
Used to specify an alternate directory to hold all index data |
|
98 |
other than the default ./data under the Solr home. If |
|
99 |
replication is in use, this should match the replication |
|
100 |
configuration. |
|
101 |
--> |
|
102 |
<dataDir>\${solr.data.dir:}</dataDir> |
|
138 | 103 |
|
139 |
<!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>: |
|
140 |
|
|
141 |
<schemaFactory class="ManagedIndexSchemaFactory"> |
|
142 |
<bool name="mutable">true</bool> |
|
143 |
<str name="managedSchemaResourceName">managed-schema</str> |
|
144 |
</schemaFactory> |
|
145 |
|
|
146 |
When ManagedIndexSchemaFactory is specified, Solr will load the schema from |
|
147 |
he resource named in 'managedSchemaResourceName', rather than from schema.xml. |
|
148 |
Note that the managed schema resource CANNOT be named schema.xml. If the managed |
|
149 |
schema does not exist, Solr will create it after reading schema.xml, then rename |
|
150 |
'schema.xml' to 'schema.xml.bak'. |
|
151 |
|
|
152 |
Do NOT hand edit the managed schema - external modifications will be ignored and |
|
153 |
overwritten as a result of schema modification REST API calls. |
|
154 | 104 |
|
155 |
When ManagedIndexSchemaFactory is specified with mutable = true, schema |
|
156 |
modification REST API calls will be allowed; otherwise, error responses will be |
|
157 |
sent back for these requests. |
|
158 |
--> |
|
159 |
<schemaFactory class="ClassicIndexSchemaFactory"/> |
|
105 |
<!-- The DirectoryFactory to use for indexes. |
|
160 | 106 |
|
161 |
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
162 |
Index Config - These settings control low-level behavior of indexing |
|
163 |
Most example settings here show the default value, but are commented |
|
164 |
out, to more easily see where customizations have been made. |
|
165 |
|
|
166 |
Note: This replaces <indexDefaults> and <mainIndex> from older versions |
|
167 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> |
|
168 |
<indexConfig> |
|
169 |
<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a |
|
170 |
LimitTokenCountFilterFactory in your fieldType definition. E.g. |
|
171 |
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> |
|
172 |
--> |
|
173 |
<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> |
|
174 |
<!-- <writeLockTimeout>1000</writeLockTimeout> --> |
|
107 |
solr.StandardDirectoryFactory is filesystem |
|
108 |
based and tries to pick the best implementation for the current |
|
109 |
JVM and platform. solr.NRTCachingDirectoryFactory, the default, |
|
110 |
wraps solr.StandardDirectoryFactory and caches small files in memory |
|
111 |
for better NRT performance. |
|
175 | 112 |
|
176 |
<!-- The maximum number of simultaneous threads that may be |
|
177 |
indexing documents at once in IndexWriter; if more than this |
|
178 |
many threads arrive they will wait for others to finish. |
|
179 |
Default in Solr/Lucene is 8. --> |
|
180 |
<maxIndexingThreads>$maxIndexingThreads$</maxIndexingThreads> |
|
113 |
One can force a particular implementation via solr.MMapDirectoryFactory, |
|
114 |
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. |
|
181 | 115 |
|
182 |
<!-- Expert: Enabling compound file will use less files for the index, |
|
183 |
using fewer file descriptors on the expense of performance decrease. |
|
184 |
Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> |
|
185 |
<!-- <useCompoundFile>false</useCompoundFile> --> |
|
116 |
solr.RAMDirectoryFactory is memory based, not |
|
117 |
persistent, and doesn't work with replication. |
|
118 |
--> |
|
119 |
<directoryFactory name="DirectoryFactory" |
|
120 |
class="\${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> |
|
186 | 121 |
|
187 |
<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
|
|
188 |
indexing for buffering added documents and deletions before they are
|
|
189 |
flushed to the Directory.
|
|
190 |
maxBufferedDocs sets a limit on the number of documents buffered
|
|
191 |
before flushing.
|
|
192 |
If both ramBufferSizeMB and maxBufferedDocs is set, then
|
|
193 |
Lucene will flush based on whichever limit is hit first.
|
|
194 |
The default is 100 MB. -->
|
|
195 |
<!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
|
|
196 |
<!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
|
|
197 |
|
|
198 |
|
|
199 |
<ramBufferSizeMB>$ramBufferSizeMB$</ramBufferSizeMB>
|
|
122 |
<!-- The CodecFactory for defining the format of the inverted index.
|
|
123 |
The default implementation is SchemaCodecFactory, which is the official Lucene
|
|
124 |
index format, but hooks into the schema to provide per-field customization of
|
|
125 |
the postings lists and per-document values in the fieldType element
|
|
126 |
(postingsFormat/docValuesFormat). Note that most of the alternative implementations
|
|
127 |
are experimental, so if you choose to customize the index format, it's a good
|
|
128 |
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
|
129 |
before upgrading to a newer version to avoid unnecessary reindexing.
|
|
130 |
A "compressionMode" string element can be added to <codecFactory> to choose
|
|
131 |
between the existing compression modes in the default codec: "BEST_SPEED" (default)
|
|
132 |
or "BEST_COMPRESSION". |
|
133 |
--> |
|
134 |
<codecFactory class="solr.SchemaCodecFactory"/>
|
|
200 | 135 |
|
201 |
<!-- Expert: Merge Policy |
|
202 |
The Merge Policy in Lucene controls how merging of segments is done. |
|
203 |
The default since Solr/Lucene 3.3 is TieredMergePolicy. |
|
204 |
The default since Lucene 2.3 was the LogByteSizeMergePolicy, |
|
205 |
Even older versions of Lucene used LogDocMergePolicy. |
|
206 |
--> |
|
207 |
<!-- |
|
208 |
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy"> |
|
209 |
<int name="maxMergeAtOnce">10</int> |
|
210 |
<int name="segmentsPerTier">10</int> |
|
211 |
</mergePolicy> |
|
212 |
--> |
|
213 |
|
|
214 |
<!-- Merge Factor |
|
215 |
The merge factor controls how many segments will get merged at a time. |
|
216 |
For TieredMergePolicy, mergeFactor is a convenience parameter which |
|
217 |
will set both MaxMergeAtOnce and SegmentsPerTier at once. |
|
218 |
For LogByteSizeMergePolicy, mergeFactor decides how many new segments |
|
219 |
will be allowed before they are merged into one. |
|
220 |
Default is 10 for both merge policies. |
|
221 |
--> |
|
222 |
|
|
223 |
<mergeFactor>$mergeFactor$</mergeFactor> |
|
224 |
|
|
136 |
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
137 |
Index Config - These settings control low-level behavior of indexing |
|
138 |
Most example settings here show the default value, but are commented |
|
139 |
out, to more easily see where customizations have been made. |
|
225 | 140 |
|
226 |
<!-- Expert: Merge Scheduler
|
|
227 |
The Merge Scheduler in Lucene controls how merges are
|
|
228 |
performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
|
|
229 |
can perform merges in the background using separate threads.
|
|
230 |
The SerialMergeScheduler (Lucene 2.2 default) does not.
|
|
231 |
-->
|
|
232 |
<!--
|
|
233 |
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
|
234 |
-->
|
|
141 |
Note: This replaces <indexDefaults> and <mainIndex> from older versions
|
|
142 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
|
143 |
<indexConfig>
|
|
144 |
<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
|
|
145 |
LimitTokenCountFilterFactory in your fieldType definition. E.g.
|
|
146 |
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
|
|
147 |
-->
|
|
148 |
<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
|
|
149 |
<!-- <writeLockTimeout>1000</writeLockTimeout> -->
|
|
235 | 150 |
|
236 |
<!-- LockFactory |
|
151 |
<!-- Expert: Enabling compound file will use less files for the index, |
|
152 |
using fewer file descriptors on the expense of performance decrease. |
|
153 |
Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> |
|
154 |
<!-- <useCompoundFile>false</useCompoundFile> --> |
|
237 | 155 |
|
238 |
This option specifies which Lucene LockFactory implementation |
|
239 |
to use. |
|
240 |
|
|
241 |
single = SingleInstanceLockFactory - suggested for a |
|
242 |
read-only index or when there is no possibility of |
|
243 |
another process trying to modify the index. |
|
244 |
native = NativeFSLockFactory - uses OS native file locking. |
|
245 |
Do not use when multiple solr webapps in the same |
|
246 |
JVM are attempting to share a single index. |
|
247 |
simple = SimpleFSLockFactory - uses a plain file for locking |
|
156 |
<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene |
|
157 |
indexing for buffering added documents and deletions before they are |
|
158 |
flushed to the Directory. |
|
159 |
maxBufferedDocs sets a limit on the number of documents buffered |
|
160 |
before flushing. |
|
161 |
If both ramBufferSizeMB and maxBufferedDocs is set, then |
|
162 |
Lucene will flush based on whichever limit is hit first. --> |
|
163 |
<!-- <ramBufferSizeMB>100</ramBufferSizeMB> --> |
|
164 |
<!-- <maxBufferedDocs>1000</maxBufferedDocs> --> |
|
248 | 165 |
|
249 |
Defaults: 'native' is default for Solr3.6 and later, otherwise |
|
250 |
'simple' is the default |
|
166 |
<!-- Expert: Merge Policy |
|
167 |
The Merge Policy in Lucene controls how merging of segments is done. |
|
168 |
The default since Solr/Lucene 3.3 is TieredMergePolicy. |
|
169 |
The default since Lucene 2.3 was the LogByteSizeMergePolicy, |
|
170 |
Even older versions of Lucene used LogDocMergePolicy. |
|
171 |
--> |
|
172 |
<!-- |
|
173 |
<mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory"> |
|
174 |
<int name="maxMergeAtOnce">10</int> |
|
175 |
<int name="segmentsPerTier">10</int> |
|
176 |
<double name="noCFSRatio">0.1</double> |
|
177 |
</mergePolicyFactory> |
|
178 |
--> |
|
251 | 179 |
|
252 |
More details on the nuances of each LockFactory... |
|
253 |
http://wiki.apache.org/lucene-java/AvailableLockFactories |
|
254 |
--> |
|
255 |
<lockType>\${solr.lock.type:native}</lockType> |
|
180 |
<!-- Expert: Merge Scheduler |
|
181 |
The Merge Scheduler in Lucene controls how merges are |
|
182 |
performed. The ConcurrentMergeScheduler (Lucene 2.3 default) |
|
183 |
can perform merges in the background using separate threads. |
|
184 |
The SerialMergeScheduler (Lucene 2.2 default) does not. |
|
185 |
--> |
|
186 |
<!-- |
|
187 |
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> |
|
188 |
--> |
|
256 | 189 |
|
257 |
<!-- Unlock On Startup
|
|
190 |
<!-- LockFactory
|
|
258 | 191 |
|
259 |
If true, unlock any held write or commit locks on startup. |
|
260 |
This defeats the locking mechanism that allows multiple |
|
261 |
processes to safely access a lucene index, and should be used |
|
262 |
with care. Default is "false". |
|
192 |
This option specifies which Lucene LockFactory implementation |
|
193 |
to use. |
|
263 | 194 |
|
264 |
This is not needed if lock type is 'single' |
|
265 |
--> |
|
266 |
<!-- |
|
267 |
<unlockOnStartup>false</unlockOnStartup> |
|
268 |
--> |
|
269 |
|
|
270 |
<!-- Expert: Controls how often Lucene loads terms into memory |
|
271 |
Default is 128 and is likely good for most everyone. |
|
272 |
--> |
|
273 |
<termIndexInterval>$termIndexInterval$</termIndexInterval> |
|
195 |
single = SingleInstanceLockFactory - suggested for a |
|
196 |
read-only index or when there is no possibility of |
|
197 |
another process trying to modify the index. |
|
198 |
native = NativeFSLockFactory - uses OS native file locking. |
|
199 |
Do not use when multiple solr webapps in the same |
|
200 |
JVM are attempting to share a single index. |
|
201 |
simple = SimpleFSLockFactory - uses a plain file for locking |
|
274 | 202 |
|
275 |
<!-- If true, IndexReaders will be opened/reopened from the IndexWriter |
|
276 |
instead of from the Directory. Hosts in a master/slave setup |
|
277 |
should have this set to false while those in a SolrCloud |
|
278 |
cluster need to be set to true. Default: true |
|
279 |
--> |
|
280 |
<!-- |
|
281 |
<nrtMode>true</nrtMode> |
|
282 |
--> |
|
203 |
Defaults: 'native' is default for Solr3.6 and later, otherwise |
|
204 |
'simple' is the default |
|
283 | 205 |
|
284 |
<!-- Commit Deletion Policy |
|
285 |
Custom deletion policies can be specified here. The class must |
|
286 |
implement org.apache.lucene.index.IndexDeletionPolicy. |
|
206 |
More details on the nuances of each LockFactory... |
|
207 |
http://wiki.apache.org/lucene-java/AvailableLockFactories |
|
208 |
--> |
|
209 |
<lockType>\${solr.lock.type:native}</lockType> |
|
287 | 210 |
|
288 |
The default Solr IndexDeletionPolicy implementation supports |
|
289 |
deleting index commit points on number of commits, age of |
|
290 |
commit point and optimized status. |
|
291 |
|
|
292 |
The latest commit point should always be preserved regardless |
|
293 |
of the criteria. |
|
294 |
--> |
|
295 |
<!-- |
|
296 |
<deletionPolicy class="solr.SolrDeletionPolicy"> |
|
297 |
--> |
|
298 |
<!-- The number of commit points to be kept --> |
|
299 |
<!-- <str name="maxCommitsToKeep">1</str> --> |
|
300 |
<!-- The number of optimized commit points to be kept --> |
|
301 |
<!-- <str name="maxOptimizedCommitsToKeep">0</str> --> |
|
302 |
<!-- |
|
303 |
Delete all commit points once they have reached the given age. |
|
304 |
Supports DateMathParser syntax e.g. |
|
305 |
--> |
|
306 |
<!-- |
|
307 |
<str name="maxCommitAge">30MINUTES</str> |
|
308 |
<str name="maxCommitAge">1DAY</str> |
|
309 |
--> |
|
310 |
<!-- |
|
311 |
</deletionPolicy> |
|
312 |
--> |
|
211 |
<!-- Commit Deletion Policy |
|
212 |
Custom deletion policies can be specified here. The class must |
|
213 |
implement org.apache.lucene.index.IndexDeletionPolicy. |
|
313 | 214 |
|
314 |
<!-- Lucene Infostream |
|
315 |
|
|
316 |
To aid in advanced debugging, Lucene provides an "InfoStream" |
|
317 |
of detailed information when indexing. |
|
215 |
The default Solr IndexDeletionPolicy implementation supports |
|
216 |
deleting index commit points on number of commits, age of |
|
217 |
commit point and optimized status. |
|
318 | 218 |
|
319 |
Setting the value to true will instruct the underlying Lucene |
|
320 |
IndexWriter to write its info stream to solr's log. By default, |
|
321 |
this is enabled here, and controlled through log4j.properties. |
|
322 |
--> |
|
323 |
<infoStream>true</infoStream> |
|
324 |
|
|
325 |
<!-- |
|
326 |
Use true to enable this safety check, which can help |
|
327 |
reduce the risk of propagating index corruption from older segments |
|
328 |
into new ones, at the expense of slower merging. |
|
329 |
--> |
|
330 |
<checkIntegrityAtMerge>false</checkIntegrityAtMerge> |
|
331 |
</indexConfig> |
|
219 |
The latest commit point should always be preserved regardless |
|
220 |
of the criteria. |
|
221 |
--> |
|
222 |
<!-- |
|
223 |
<deletionPolicy class="solr.SolrDeletionPolicy"> |
|
224 |
--> |
|
225 |
<!-- The number of commit points to be kept --> |
|
226 |
<!-- <str name="maxCommitsToKeep">1</str> --> |
|
227 |
<!-- The number of optimized commit points to be kept --> |
|
228 |
<!-- <str name="maxOptimizedCommitsToKeep">0</str> --> |
|
229 |
<!-- |
|
230 |
Delete all commit points once they have reached the given age. |
|
231 |
Supports DateMathParser syntax e.g. |
|
232 |
--> |
|
233 |
<!-- |
|
234 |
<str name="maxCommitAge">30MINUTES</str> |
|
235 |
<str name="maxCommitAge">1DAY</str> |
|
236 |
--> |
|
237 |
<!-- |
|
238 |
</deletionPolicy> |
|
239 |
--> |
|
332 | 240 |
|
241 |
<!-- Lucene Infostream |
|
333 | 242 |
|
334 |
<!-- JMX |
|
335 |
|
|
336 |
This example enables JMX if and only if an existing MBeanServer |
|
337 |
is found, use this if you want to configure JMX through JVM |
|
338 |
parameters. Remove this to disable exposing Solr configuration |
|
339 |
and statistics to JMX. |
|
243 |
To aid in advanced debugging, Lucene provides an "InfoStream" |
|
244 |
of detailed information when indexing. |
|
340 | 245 |
|
341 |
For more details see http://wiki.apache.org/solr/SolrJmx |
|
342 |
--> |
|
343 |
<jmx /> |
|
344 |
<!-- If you want to connect to a particular server, specify the |
|
345 |
agentId |
|
346 |
--> |
|
347 |
<!-- <jmx agentId="myAgent" /> --> |
|
348 |
<!-- If you want to start a new MBeanServer, specify the serviceUrl --> |
|
349 |
<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> |
|
350 |
--> |
|
246 |
Setting The value to true will instruct the underlying Lucene |
|
247 |
IndexWriter to write its debugging info the specified file |
|
248 |
--> |
|
249 |
<!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> |
|
250 |
</indexConfig> |
|
351 | 251 |
|
352 |
<!-- The default high-performance update handler --> |
|
353 |
<updateHandler class="solr.DirectUpdateHandler2"> |
|
354 | 252 |
|
355 |
<!-- Enables a transaction log, used for real-time get, durability, and |
|
356 |
and solr cloud replica recovery. The log can grow as big as |
|
357 |
uncommitted changes to the index, so use of a hard autoCommit |
|
358 |
is recommended (see below). |
|
359 |
"dir" - the target directory for transaction logs, defaults to the |
|
360 |
solr data directory. --> |
|
361 |
<updateLog> |
|
362 |
<str name="dir">\${solr.ulog.dir:}</str> |
|
363 |
</updateLog> |
|
364 |
|
|
365 |
<!-- AutoCommit |
|
253 |
<!-- JMX |
|
366 | 254 |
|
367 |
Perform a hard commit automatically under certain conditions. |
|
368 |
Instead of enabling autoCommit, consider using "commitWithin" |
|
369 |
when adding documents. |
|
255 |
This example enables JMX if and only if an existing MBeanServer |
|
256 |
is found, use this if you want to configure JMX through JVM |
|
257 |
parameters. Remove this to disable exposing Solr configuration |
|
258 |
and statistics to JMX. |
|
370 | 259 |
|
371 |
http://wiki.apache.org/solr/UpdateXmlMessages |
|
260 |
For more details see http://wiki.apache.org/solr/SolrJmx |
|
261 |
--> |
|
262 |
<jmx /> |
|
263 |
<!-- If you want to connect to a particular server, specify the |
|
264 |
agentId |
|
265 |
--> |
|
266 |
<!-- <jmx agentId="myAgent" /> --> |
|
267 |
<!-- If you want to start a new MBeanServer, specify the serviceUrl --> |
|
268 |
<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> |
|
269 |
--> |
|
372 | 270 |
|
373 |
maxDocs - Maximum number of documents to add since the last
|
|
374 |
commit before automatically triggering a new commit.
|
|
271 |
<!-- The default high-performance update handler -->
|
|
272 |
<updateHandler class="solr.DirectUpdateHandler2">
|
|
375 | 273 |
|
376 |
maxTime - Maximum amount of time in ms that is allowed to pass |
|
377 |
since a document was added before automatically |
|
378 |
triggering a new commit. |
|
379 |
openSearcher - if false, the commit causes recent index changes |
|
380 |
to be flushed to stable storage, but does not cause a new |
|
381 |
searcher to be opened to make those changes visible. |
|
274 |
<!-- Enables a transaction log, used for real-time get, durability, and |
|
275 |
and solr cloud replica recovery. The log can grow as big as |
|
276 |
uncommitted changes to the index, so use of a hard autoCommit |
|
277 |
is recommended (see below). |
|
278 |
"dir" - the target directory for transaction logs, defaults to the |
|
279 |
solr data directory. |
|
280 |
"numVersionBuckets" - sets the number of buckets used to keep |
|
281 |
track of max version values when checking for re-ordered |
|
282 |
updates; increase this value to reduce the cost of |
|
283 |
synchronizing access to version buckets during high-volume |
|
284 |
indexing, this requires 8 bytes (long) * numVersionBuckets |
|
285 |
of heap space per Solr core. |
|
286 |
--> |
|
287 |
<updateLog> |
|
288 |
<str name="dir">\${solr.ulog.dir:}</str> |
|
289 |
<int name="numVersionBuckets">\${solr.ulog.numVersionBuckets:65536}</int> |
|
290 |
</updateLog> |
|
382 | 291 |
|
383 |
If the updateLog is enabled, then it's highly recommended to |
|
384 |
have some sort of hard autoCommit to limit the log size. |
|
385 |
--> |
|
386 |
<autoCommit> |
|
387 |
<maxTime>\${solr.autoCommit.maxTime:$autocommit$}</maxTime> |
|
388 |
<openSearcher>false</openSearcher> |
|
389 |
</autoCommit> |
|
292 |
<!-- AutoCommit |
|
390 | 293 |
|
391 |
<!-- softAutoCommit is like autoCommit except it causes a |
|
392 |
'soft' commit which only ensures that changes are visible |
|
393 |
but does not ensure that data is synced to disk. This is |
|
394 |
faster and more near-realtime friendly than a hard commit. |
|
395 |
--> |
|
294 |
Perform a hard commit automatically under certain conditions. |
|
295 |
Instead of enabling autoCommit, consider using "commitWithin" |
|
296 |
when adding documents. |
|
396 | 297 |
|
397 |
<autoSoftCommit> |
|
398 |
<maxTime>\${solr.autoSoftCommit.maxTime:$autosoftcommit$}</maxTime> |
|
399 |
</autoSoftCommit> |
|
298 |
http://wiki.apache.org/solr/UpdateXmlMessages |
|
400 | 299 |
|
401 |
<!-- Update Related Event Listeners |
|
402 |
|
|
403 |
Various IndexWriter related events can trigger Listeners to |
|
404 |
take actions. |
|
300 |
maxDocs - Maximum number of documents to add since the last |
|
301 |
commit before automatically triggering a new commit. |
|
405 | 302 |
|
406 |
postCommit - fired after every commit or optimize command |
|
407 |
postOptimize - fired after every optimize command |
|
408 |
--> |
|
409 |
<!-- The RunExecutableListener executes an external command from a |
|
410 |
hook such as postCommit or postOptimize. |
|
411 |
|
|
412 |
exe - the name of the executable to run |
|
413 |
dir - dir to use as the current working directory. (default=".") |
|
414 |
wait - the calling thread waits until the executable returns. |
|
415 |
(default="true") |
|
416 |
args - the arguments to pass to the program. (default is none) |
|
417 |
env - environment variables to set. (default is none) |
|
418 |
--> |
|
419 |
<!-- This example shows how RunExecutableListener could be used |
|
420 |
with the script based replication... |
|
421 |
http://wiki.apache.org/solr/CollectionDistribution |
|
422 |
--> |
|
423 |
<!-- |
|
424 |
<listener event="postCommit" class="solr.RunExecutableListener"> |
|
425 |
<str name="exe">solr/bin/snapshooter</str> |
|
426 |
<str name="dir">.</str> |
|
427 |
<bool name="wait">true</bool> |
|
428 |
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr> |
|
429 |
<arr name="env"> <str>MYVAR=val1</str> </arr> |
|
430 |
</listener> |
|
431 |
--> |
|
303 |
maxTime - Maximum amount of time in ms that is allowed to pass |
|
304 |
since a document was added before automatically |
|
305 |
triggering a new commit. |
|
306 |
openSearcher - if false, the commit causes recent index changes |
|
307 |
to be flushed to stable storage, but does not cause a new |
|
308 |
searcher to be opened to make those changes visible. |
|
432 | 309 |
|
433 |
</updateHandler> |
|
434 |
|
|
435 |
<!-- IndexReaderFactory |
|
310 |
If the updateLog is enabled, then it's highly recommended to |
|
311 |
have some sort of hard autoCommit to limit the log size. |
|
312 |
--> |
|
313 |
<autoCommit> |
|
314 |
<maxTime>\${solr.autoCommit.maxTime:15000}</maxTime> |
|
315 |
<openSearcher>false</openSearcher> |
|
316 |
</autoCommit> |
|
436 | 317 |
|
437 |
Use the following format to specify a custom IndexReaderFactory, |
Also available in: Unified diff
updated configuration templates to solr 7.5.0