Project

General

Profile

« Previous | Next » 

Revision 62668

Added by Alessia Bardi over 1 year ago

[maven-release-plugin] copy for tag dnet-index-solr-service-3.0.0

View differences:

modules/dnet-index-solr-service/tags/dnet-index-solr-service-3.0.0/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-index-solr-service</artifactId>
12
	<version>3.0.0</version>
13
	<scm>
14
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-index-solr-service/tags/dnet-index-solr-service-3.0.0</developerConnection>
15
	</scm>
16
	<dependencies>
17
		<dependency>
18
			<groupId>eu.dnetlib</groupId>
19
			<artifactId>dnet-modular-index-service</artifactId>
20
			<version>[3.0.0,4.0.0)</version>
21
		</dependency>
22
		<dependency>
23
			<groupId>eu.dnetlib</groupId>
24
			<artifactId>dnet-index-client</artifactId>
25
			<version>[3.0.0,4.0.0)</version>
26
		</dependency>
27
	</dependencies>
28
</project>
modules/dnet-index-solr-service/tags/dnet-index-solr-service-3.0.0/src/main/resources/eu/dnetlib/functionality/index/conf/schemaTemplate.xslt
1
<?xml version="1.0" encoding="UTF-8" ?>
2
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
3
	<xsl:output omit-xml-declaration="yes" indent="yes"/>
4

  
5
	<xsl:template match="/FIELDS">
6

  
7
		<xsl:param name="textFieldType" select="string('text_common')"/>
8
		<xsl:variable name="smallcase" select="'abcdefghijklmnopqrstuvwxyz'"/>
9
		<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"/>
10

  
11
		<!--
12
		D-Net index schema template
13

  
14
		CHANGELOG
15

  
16
		0.1 : first release
17
		0.2 : added preserveOriginal="1" for text field type in the index analyzer and catenateWords="1" for the query analyzer
18
		0.3 : changed language for SnowballPorterFilterFactory to language="German2" (index/query) in the text field type
19
		0.4 : added solr.ASCIIFoldingFilterFactory filter (index/query) in the text field type
20
		0.5 : added long_keyword field type, to be used for objIdentifiers
21
		0.6 : added field types for spellchecking
22
		0.7 : added parameter for text field type
23
		0.8 : added field _version_, needed by Solr 4.0.0 for the transaction log
24
		0.9   : added type: text_en_splitting
25
		0.91  : added type: ngramtext
26
		0.92  : added schema optimizations, removing unnecessary stored fields
27
		0.93  : added attribute preserveOriginal="1" to fieldtype ngramtext (query analysis) to improve matches
28
		0.94  : updated and simplified ngramtext fieldtype
29
		0.95  : update to solr 4.4, removed attribute "compress" from field definition, ngramfield doesn't support NGramFilterFactory anymore
30
		0.96  : update to solr 4.9
31
		0.97  : introduced field type string_ci supporting case insensitivity.
32
		1.0   : updated to solr 6.6.0
33
		 -->
34
		<schema name="dnet" version="1.0">
35

  
36
			<!-- Valid attributes for fields:
37
			 name: mandatory - the name for the field
38
			 type: mandatory - the name of a field type from the
39
			   fieldTypes section
40
			 indexed: true if this field should be indexed (searchable or sortable)
41
			 stored: true if this field should be retrievable
42
			 docValues: true if this field should have doc values. Doc values are
43
			   useful (required, if you are using *Point fields) for faceting,
44
			   grouping, sorting and function queries. Doc values will make the index
45
			   faster to load, more NRT-friendly and more memory-efficient.
46
			   They however come with some limitations: they are currently only
47
			   supported by StrField, UUIDField, all Trie*Fields and *PointFields,
48
			   and depending on the field type, they might require the field to be
49
			   single-valued, be required or have a default value (check the
50
			   documentation of the field type you're interested in for more information)
51
			 multiValued: true if this field may contain multiple values per document
52
			 omitNorms: (expert) set to true to omit the norms associated with
53
			   this field (this disables length normalization and index-time
54
			   boosting for the field, and saves some memory).  Only full-text
55
			   fields or fields that need an index-time boost need norms.
56
			   Norms are omitted for primitive (non-analyzed) types by default.
57
			 termVectors: [false] set to true to store the term vector for a
58
			   given field.
59
			   When using MoreLikeThis, fields used for similarity should be
60
			   stored for best performance.
61
			 termPositions: Store position information with the term vector.
62
			   This will increase storage costs.
63
			 termOffsets: Store offset information with the term vector. This
64
			   will increase storage costs.
65
			 required: The field is required.  It will throw an error if the
66
			   value does not exist
67
			 default: a value that should be used if no value is specified
68
			   when adding a document.
69
			-->
70

  
71
			<!-- field names should consist of alphanumeric or underscore characters only and
72
			  not start with a digit.  This is not currently strictly enforced,
73
			  but other field names will not have first class support from all components
74
			  and back compatibility is not guaranteed.  Names with both leading and
75
			  trailing underscores (e.g. _version_) are reserved.
76
			-->
77

  
78
			<xsl:for-each select="./FIELD">
79
				<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
80
				<xsl:variable name="fieldtype">
81
					<xsl:choose>
82
						<xsl:when test="@type"><xsl:value-of select="@type"/></xsl:when>
83
						<xsl:when test="@tokenizable='false'">string</xsl:when>
84
						<xsl:otherwise>
85
							<xsl:value-of select="$textFieldType"/>
86
						</xsl:otherwise>
87
					</xsl:choose>
88
				</xsl:variable>
89
				<xsl:variable name="isMultivalued">
90
					<xsl:choose>
91
						<xsl:when test="@multivalued='false'">false</xsl:when>
92
						<xsl:otherwise>true</xsl:otherwise>
93
					</xsl:choose>
94
				</xsl:variable>
95
				<xsl:variable name="isStored">
96
					<xsl:choose>
97
						<xsl:when test="@stored='true'">true</xsl:when>
98
						<xsl:otherwise>false</xsl:otherwise>
99
					</xsl:choose>
100
				</xsl:variable>
101

  
102
				<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/>
103
			</xsl:for-each>
104

  
105
			<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
106

  
107
			<field name="__result" type="string" indexed="false" stored="true" multiValued="false" docValues="false"/>
108

  
109
			<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
110

  
111
			<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
112

  
113
			<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
114

  
115
			<!-- field for ping -->
116
			<field name="text" type="{$textFieldType}" indexed="false" stored="false"/>
117

  
118
			<!-- Field to use to determine and enforce document uniqueness.
119
				 Unless this field is marked with required="false", it will be a required field
120
			  -->
121
			<uniqueKey>__indexrecordidentifier</uniqueKey>
122

  
123
			<xsl:for-each select="./FIELD[@copy = 'true']">
124
				<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
125
				<copyField source="{$fieldname}" dest="__all"/>
126
			</xsl:for-each>
127

  
128
			<!-- copyField commands copy one field to another at the time a document
129
			   is added to the index.  It's used either to index the same field differently,
130
			   or to add multiple fields to the same field for easier/faster searching.
131

  
132
			<copyField source="sourceFieldName" dest="destinationFieldName"/>
133
			-->
134

  
135
			<!-- field type definitions. The "name" attribute is
136
			   just a label to be used by field definitions.  The "class"
137
			   attribute and any other attributes determine the real
138
			   behavior of the fieldType.
139
				 Class names starting with "solr" refer to java classes in a
140
			   standard package such as org.apache.solr.analysis
141
			-->
142

  
143
			<!-- The StrField type is not analyzed, but indexed/stored verbatim.
144
			   It supports doc values but in that case the field needs to be
145
			   single-valued and either required or have a default value.
146
			  -->
147
			<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
148
			<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
149

  
150
			<!-- boolean type: "true" or "false" -->
151
			<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
152

  
153
			<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
154

  
155
			<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
156
				 currently supported on types that are sorted internally as strings
157
				 and on numeric types.
158
				 This includes "string","boolean", "int", "float", "long", "date", "double",
159
				 including the "Trie" and "Point" variants.
160
			   - If sortMissingLast="true", then a sort on this field will cause documents
161
				 without the field to come after documents with the field,
162
				 regardless of the requested sort order (asc or desc).
163
			   - If sortMissingFirst="true", then a sort on this field will cause documents
164
				 without the field to come before documents with the field,
165
				 regardless of the requested sort order.
166
			   - If sortMissingLast="false" and sortMissingFirst="false" (the default),
167
				 then default lucene sorting will be used which places docs without the
168
				 field first in an ascending sort and last in a descending sort.
169
			-->
170

  
171
			<!--
172
			  Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at
173
			  search time and at index time, but some features are still not supported.
174
			  Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
175
			-->
176
			<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
177
			<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
178
			<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
179
			<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
180

  
181
			<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
182
			<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
183
			<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
184
			<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
185

  
186
			<!--
187
			  Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the
188
			  tint/tfloat/tlong/tdouble types.
189
			-->
190
			<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
191
			<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
192
			<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
193
			<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
194

  
195
			<fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
196
			<fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
197
			<fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
198
			<fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
199

  
200
			<!--
201
			 Numeric field types that index each value at various levels of precision
202
			 to accelerate range queries when the number of values between the range
203
			 endpoints is large. See the javadoc for NumericRangeQuery for internal
204
			 implementation details.
205

  
206
			 Smaller precisionStep values (specified in bits) will lead to more tokens
207
			 indexed per value, slightly larger index size, and faster range queries.
208
			 A precisionStep of 0 disables indexing at different precision levels.
209

  
210
			 Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible
211
			-->
212
			<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
213
			<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
214
			<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
215
			<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
216

  
217
			<fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
218
			<fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
219
			<fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
220
			<fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
221

  
222
			<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
223
				 is a more restricted form of the canonical representation of dateTime
224
				 http://www.w3.org/TR/xmlschema-2/#dateTime
225
				 The trailing "Z" designates UTC time and is mandatory.
226
				 Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
227
				 All other components are mandatory.
228

  
229
				 Expressions can also be used to denote calculations that should be
230
				 performed relative to "NOW" to determine the value, ie...
231

  
232
					   NOW/HOUR
233
						  ... Round to the start of the current hour
234
					   NOW-1DAY
235
						  ... Exactly 1 day prior to now
236
					   NOW/DAY+6MONTHS+3DAYS
237
						  ... 6 months and 3 days in the future from the start of
238
							  the current day
239

  
240
				 Consult the TrieDateField javadocs for more information.
241
			  -->
242
			<!-- KD-tree versions of date fields -->
243
			<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
244
			<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
245

  
246
			<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
247
			<fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
248

  
249
			<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
250
			<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
251

  
252

  
253
			<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
254
			<fieldType name="binary" class="solr.BinaryField"/>
255

  
256
			<!-- The "RandomSortField" is not used to store or search any
257
				 data.  You can declare fields of this type it in your schema
258
				 to generate pseudo-random orderings of your docs for sorting
259
				 or function purposes.  The ordering is generated based on the field
260
				 name and the version of the index. As long as the index version
261
				 remains unchanged, and the same field name is reused,
262
				 the ordering of the docs will be consistent.
263
				 If you want different psuedo-random orderings of documents,
264
				 for the same version of the index, use a dynamicField and
265
				 change the field name in the request.
266
			 -->
267
			<fieldType name="random" class="solr.RandomSortField" indexed="true" />
268

  
269
			<!-- solr.TextField allows the specification of custom text analyzers
270
				 specified as a tokenizer and a list of token filters. Different
271
				 analyzers may be specified for indexing and querying.
272

  
273
				 The optional positionIncrementGap puts space between multiple fields of
274
				 this type on the same document, with the purpose of preventing false phrase
275
				 matching across fields.
276

  
277
				 For more info on customizing your analyzer chain, please see
278
				 http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
279
			 -->
280

  
281
			<!-- One can also specify an existing Analyzer class that has a
282
				 default constructor via the class attribute on the analyzer element.
283
				 Example:
284
			<fieldType name="text_greek" class="solr.TextField">
285
			  <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
286
			</fieldType>
287
			-->
288

  
289
			<!-- A text field that only splits on whitespace for exact matching of words -->
290
			<!-- <dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/> -->
291

  
292
			<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
293
				<analyzer>
294
					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
295
				</analyzer>
296
			</fieldType>
297

  
298
			<fieldType name="ngramtext" class="solr.TextField">
299
				<analyzer type="index">
300
					<tokenizer class="solr.KeywordTokenizerFactory"/>
301
					<filter class="solr.LowerCaseFilterFactory"/>
302
					<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/>
303
					<filter class="solr.TrimFilterFactory"/>
304
					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
305
				</analyzer>
306
				<analyzer type="query">
307
					<tokenizer class="solr.KeywordTokenizerFactory"/>
308
					<filter class="solr.LowerCaseFilterFactory"/>
309
				</analyzer>
310
			</fieldType>
311

  
312

  
313
			<fieldType name="personName" class="solr.TextField"  positionIncrementGap="100">
314
				<analyzer>
315
					<tokenizer class="solr.StandardTokenizerFactory" />
316
					<filter class="solr.LowerCaseFilterFactory" />
317
				</analyzer>
318
			</fieldType>
319

  
320
			<fieldType name="personNamePrefix" class="solr.TextField"  positionIncrementGap="100">
321
				<analyzer type="index">
322
					<tokenizer class="solr.StandardTokenizerFactory"/>
323
					<filter class="solr.LowerCaseFilterFactory" />
324
					<filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="30" />
325
				</analyzer>
326
				<analyzer type="query">
327
					<tokenizer class="solr.StandardTokenizerFactory"/>
328
					<filter class="solr.LowerCaseFilterFactory" />
329
				</analyzer>
330
			</fieldType>
331

  
332

  
333
			<!-- A general text field that has reasonable, generic
334
				 cross-language defaults: it tokenizes with StandardTokenizer,
335
				   removes stop words from case-insensitive "stopwords.txt"
336
				   (empty by default), and down cases.  At query time only, it
337
				   also applies synonyms.
338
			  -->
339
			<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100" multiValued="true">
340
				<analyzer type="index">
341
					<tokenizer class="solr.StandardTokenizerFactory"/>
342
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
343
					<!-- in this example, we will only use synonyms at query time
344
					<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
345
					<filter class="solr.FlattenGraphFilterFactory"/>
346
					-->
347
					<filter class="solr.LowerCaseFilterFactory"/>
348
				</analyzer>
349
				<analyzer type="query">
350
					<tokenizer class="solr.StandardTokenizerFactory"/>
351
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
352
					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
353
					<filter class="solr.LowerCaseFilterFactory"/>
354
				</analyzer>
355
			</fieldType>
356

  
357
			<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
358
				<analyzer type="index">
359
					<tokenizer class="solr.StandardTokenizerFactory"/>
360
					<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
361
					<filter class="solr.LowerCaseFilterFactory"/>
362
					<filter class="solr.EnglishPossessiveFilterFactory"/>
363
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
364
					<filter class="solr.PorterStemFilterFactory"/>
365
				</analyzer>
366
				<analyzer type="query">
367
					<tokenizer class="solr.StandardTokenizerFactory"/>
368
					<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
369
					<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
370
					<filter class="solr.LowerCaseFilterFactory"/>
371
					<filter class="solr.EnglishPossessiveFilterFactory"/>
372
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
373
					<filter class="solr.PorterStemFilterFactory"/>
374
				</analyzer>
375
			</fieldType>
376

  
377
			<!-- A text field with defaults appropriate for English, plus
378
				 aggressive word-splitting and autophrase features enabled.
379
				 This field is just like text_en, except it adds
380
				 WordDelimiterGraphFilter to enable splitting and matching of
381
				 words on case-change, alpha numeric boundaries, and
382
				 non-alphanumeric chars.  This means certain compound word
383
				 cases will work, for example query "wi fi" will match
384
				 document "WiFi" or "wi-fi".
385
			-->
386
			<!-- <dynamicField name="*_txt_en_split" type="text_en_splitting"  indexed="true"  stored="true"/> -->
387
			<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
388
				<analyzer type="index">
389
					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
390
					<!-- in this example, we will only use synonyms at query time
391
					<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
392
					-->
393
					<!-- Case insensitive stop word removal.
394
					-->
395
					<filter class="solr.StopFilterFactory"
396
							ignoreCase="true"
397
							words="stopwords.txt"
398
					/>
399
					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
400
					<filter class="solr.LowerCaseFilterFactory"/>
401
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
402
					<filter class="solr.PorterStemFilterFactory"/>
403
					<filter class="solr.FlattenGraphFilterFactory" />
404
				</analyzer>
405
				<analyzer type="query">
406
					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
407
					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
408
					<filter class="solr.StopFilterFactory"
409
							ignoreCase="true"
410
							words="stopwords.txt"
411
					/>
412
					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
413
					<filter class="solr.LowerCaseFilterFactory"/>
414
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
415
					<filter class="solr.PorterStemFilterFactory"/>
416
				</analyzer>
417
			</fieldType>
418

  
419
			<!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
420
				 but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
421
			<!-- <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"  indexed="true"  stored="true"/> -->
422
			<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
423
				<analyzer type="index">
424
					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
425
					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
426
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
427
					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
428
					<filter class="solr.LowerCaseFilterFactory"/>
429
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
430
					<filter class="solr.EnglishMinimalStemFilterFactory"/>
431
					<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
432
						 possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
433
					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
434
					<filter class="solr.FlattenGraphFilterFactory" />
435
				</analyzer>
436
				<analyzer type="query">
437
					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
438
					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
439
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
440
					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
441
					<filter class="solr.LowerCaseFilterFactory"/>
442
					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
443
					<filter class="solr.EnglishMinimalStemFilterFactory"/>
444
					<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
445
						 possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
446
					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
447
				</analyzer>
448
			</fieldType>
449

  
450
			<!-- Just like text_common except it reverses the characters of
451
				   each token, to enable more efficient leading wildcard queries.
452
			-->
453
			<!-- <dynamicField name="*_txt_rev" type="text_common_rev"  indexed="true"  stored="true"/> -->
454
			<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
455
				<analyzer type="index">
456
					<tokenizer class="solr.StandardTokenizerFactory"/>
457
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
458
					<filter class="solr.LowerCaseFilterFactory"/>
459
					<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
460
							maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
461
				</analyzer>
462
				<analyzer type="query">
463
					<tokenizer class="solr.StandardTokenizerFactory"/>
464
					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
465
					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
466
					<filter class="solr.LowerCaseFilterFactory"/>
467
				</analyzer>
468
			</fieldType>
469

  
470
			<!-- <dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/> -->
471
			<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
472
				<analyzer>
473
					<tokenizer class="solr.StandardTokenizerFactory"/>
474
					<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
475
				</analyzer>
476
			</fieldType>
477

  
478
			<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
479
				<analyzer type="query">
480
					<tokenizer class="solr.KeywordTokenizerFactory"/>
481
					<filter class="solr.LowerCaseFilterFactory"/>
482
				</analyzer>
483
			</fieldType>
484

  
485
			<!--
486
			  Example of using PathHierarchyTokenizerFactory at index time, so
487
			  queries for paths match documents at that path, or in descendent paths
488
			-->
489
			<!-- <dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/> -->
490
			<fieldType name="descendent_path" class="solr.TextField">
491
				<analyzer type="index">
492
					<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
493
				</analyzer>
494
				<analyzer type="query">
495
					<tokenizer class="solr.KeywordTokenizerFactory" />
496
				</analyzer>
497
			</fieldType>
498

  
499
			<!--
500
			  Example of using PathHierarchyTokenizerFactory at query time, so
501
			  queries for paths match documents at that path, or in ancestor paths
502
			-->
503
			<!-- <dynamicField name="*_ancestor_path" type="ancestor_path"  indexed="true"  stored="true"/> -->
504
			<fieldType name="ancestor_path" class="solr.TextField">
505
				<analyzer type="index">
506
					<tokenizer class="solr.KeywordTokenizerFactory" />
507
				</analyzer>
508
				<analyzer type="query">
509
					<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
510
				</analyzer>
511
			</fieldType>
512

  
513
			<!-- since fields of this type are by default not stored or indexed,
514
				 any data added to them will be ignored outright.  -->
515
			<fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
516

  
517
			<!-- This point type indexes the coordinates as separate fields (subFields)
518
			  If subFieldType is defined, it references a type, and a dynamic field
519
			  definition is created matching *___<typename>.  Alternately, if
520
			  subFieldSuffix is defined, that is used to create the subFields.
521
			  Example: if subFieldType="double", then the coordinates would be
522
				indexed in fields myloc_0___double,myloc_1___double.
523
			  Example: if subFieldSuffix="_d" then the coordinates would be indexed
524
				in fields myloc_0_d,myloc_1_d
525
			  The subFields are an implementation detail of the fieldType, and end
526
			  users normally should not need to know about them.
527
			 -->
528
			<!-- <dynamicField name="*_point" type="point"  indexed="true"  stored="true"/> -->
529
			<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
530

  
531
			<!-- A specialized field for geospatial search filters and distance sorting. -->
532
			<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
533

  
534
			<!-- An alternative geospatial field type new to Solr 4.  It supports multiValued and polygon shapes.
535
			  For more information about this and other Spatial fields new to Solr 4, see:
536
			  http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
537
			-->
538
			<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
539
					   geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
540

  
541
		</schema>
542
	</xsl:template>
543
</xsl:stylesheet>
modules/dnet-index-solr-service/tags/dnet-index-solr-service-3.0.0/src/main/resources/eu/dnetlib/functionality/index/conf/solrconfig.xml.st
1
<?xml version="1.0" encoding="UTF-8" ?>
2
<!--
3
 Licensed to the Apache Software Foundation (ASF) under one or more
4
 contributor license agreements.  See the NOTICE file distributed with
5
 this work for additional information regarding copyright ownership.
6
 The ASF licenses this file to You under the Apache License, Version 2.0
7
 (the "License"); you may not use this file except in compliance with
8
 the License.  You may obtain a copy of the License at
9

  
10
     http://www.apache.org/licenses/LICENSE-2.0
11

  
12
 Unless required by applicable law or agreed to in writing, software
13
 distributed under the License is distributed on an "AS IS" BASIS,
14
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 See the License for the specific language governing permissions and
16
 limitations under the License.
17
-->
18

  
19
<!--
20
     For more details about configurations options that may appear in
21
     this file, see http://wiki.apache.org/solr/SolrConfigXml.
22
-->
23
<config>
24
	<!-- In all configuration below, a prefix of "solr." for class names
25
		 is an alias that causes solr to search appropriate packages,
26
		 including org.apache.solr.(search|update|request|core|analysis)
27

  
28
		 You may also specify a fully qualified Java classname if you
29
		 have your own custom plugins.
30
	  -->
31

  
32
	<!-- Controls what version of Lucene various components of Solr
33
		 adhere to.  Generally, you want to use the latest version to
34
		 get all bug fixes and improvements. It is highly recommended
35
		 that you fully re-index after changing this setting as it can
36
		 affect both how text is indexed and queried.
37
	-->
38
	<luceneMatchVersion>$luceneMatchVersion$</luceneMatchVersion>
39

  
40
	<!-- <lib/> directives can be used to instruct Solr to load any Jars
41
		 identified and use them to resolve any "plugins" specified in
42
		 your solrconfig.xml or schema.xml (ie: Analyzers, Request
43
		 Handlers, etc...).
44

  
45
		 All directories and paths are resolved relative to the
46
		 instanceDir.
47

  
48
		 Please note that <lib/> directives are processed in the order
49
		 that they appear in your solrconfig.xml file, and are "stacked"
50
		 on top of each other when building a ClassLoader - so if you have
51
		 plugin jars with dependencies on other jars, the "lower level"
52
		 dependency jars should be loaded first.
53

  
54
		 If a "./lib" directory exists in your instanceDir, all files
55
		 found in it are included as if you had used the following
56
		 syntax...
57

  
58
				<lib dir="./lib" />
59
	  -->
60

  
61
	<!-- A 'dir' option by itself adds any files found in the directory
62
		 to the classpath, this is useful for including all jars in a
63
		 directory.
64

  
65
		 When a 'regex' is specified in addition to a 'dir', only the
66
		 files in that directory which completely match the regex
67
		 (anchored on both ends) will be included.
68

  
69
		 If a 'dir' option (with or without a regex) is used and nothing
70
		 is found that matches, a warning will be logged.
71

  
72
		 The examples below can be used to load some solr-contribs along
73
		 with their external dependencies.
74
	  -->
75
	<lib dir="\${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
76
	<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
77

  
78
	<lib dir="\${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" />
79
	<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" />
80

  
81
	<lib dir="\${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" />
82
	<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" />
83

  
84
	<lib dir="\${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
85
	<lib dir="\${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
86

  
87
	<!-- an exact 'path' can be used instead of a 'dir' to specify a
88
		 specific jar file.  This will cause a serious error to be logged
89
		 if it can't be loaded.
90
	  -->
91
	<!--
92
	   <lib path="../a-jar-that-does-not-exist.jar" />
93
	-->
94

  
95
	<!-- Data Directory
96

  
97
		 Used to specify an alternate directory to hold all index data
98
		 other than the default ./data under the Solr home.  If
99
		 replication is in use, this should match the replication
100
		 configuration.
101
	  -->
102
	<dataDir>\${solr.data.dir:}</dataDir>
103

  
104

  
105
	<!-- The DirectoryFactory to use for indexes.
106

  
107
		 solr.StandardDirectoryFactory is filesystem
108
		 based and tries to pick the best implementation for the current
109
		 JVM and platform.  solr.NRTCachingDirectoryFactory, the default,
110
		 wraps solr.StandardDirectoryFactory and caches small files in memory
111
		 for better NRT performance.
112

  
113
		 One can force a particular implementation via solr.MMapDirectoryFactory,
114
		 solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
115

  
116
		 solr.RAMDirectoryFactory is memory based, not
117
		 persistent, and doesn't work with replication.
118
	  -->
119
	<directoryFactory name="DirectoryFactory"
120
	                  class="\${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
121

  
122
	<!-- The CodecFactory for defining the format of the inverted index.
123
		 The default implementation is SchemaCodecFactory, which is the official Lucene
124
		 index format, but hooks into the schema to provide per-field customization of
125
		 the postings lists and per-document values in the fieldType element
126
		 (postingsFormat/docValuesFormat). Note that most of the alternative implementations
127
		 are experimental, so if you choose to customize the index format, it's a good
128
		 idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
129
		 before upgrading to a newer version to avoid unnecessary reindexing.
130
		 A "compressionMode" string element can be added to <codecFactory> to choose
131
		 between the existing compression modes in the default codec: "BEST_SPEED" (default)
132
		 or "BEST_COMPRESSION".
133
	-->
134
	<codecFactory class="solr.SchemaCodecFactory"/>
135

  
136
	<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137
		 Index Config - These settings control low-level behavior of indexing
138
		 Most example settings here show the default value, but are commented
139
		 out, to more easily see where customizations have been made.
140

  
141
		 Note: This replaces <indexDefaults> and <mainIndex> from older versions
142
		 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
143
	<indexConfig>
144
		<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
145
			 LimitTokenCountFilterFactory in your fieldType definition. E.g.
146
		 <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
147
		-->
148
		<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
149
		<!-- <writeLockTimeout>1000</writeLockTimeout>  -->
150

  
151
		<!-- Expert: Enabling compound file will use less files for the index,
152
			 using fewer file descriptors on the expense of performance decrease.
153
			 Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
154
		<!-- <useCompoundFile>false</useCompoundFile> -->
155

  
156
		<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
157
			 indexing for buffering added documents and deletions before they are
158
			 flushed to the Directory.
159
			 maxBufferedDocs sets a limit on the number of documents buffered
160
			 before flushing.
161
			 If both ramBufferSizeMB and maxBufferedDocs is set, then
162
			 Lucene will flush based on whichever limit is hit first.  -->
163
		<!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
164
		<!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
165

  
166
		<!-- Expert: Merge Policy
167
			 The Merge Policy in Lucene controls how merging of segments is done.
168
			 The default since Solr/Lucene 3.3 is TieredMergePolicy.
169
			 The default since Lucene 2.3 was the LogByteSizeMergePolicy,
170
			 Even older versions of Lucene used LogDocMergePolicy.
171
		  -->
172
		<!--
173
			<mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory">
174
			  <int name="maxMergeAtOnce">10</int>
175
			  <int name="segmentsPerTier">10</int>
176
			  <double name="noCFSRatio">0.1</double>
177
			</mergePolicyFactory>
178
		  -->
179

  
180
		<!-- Expert: Merge Scheduler
181
			 The Merge Scheduler in Lucene controls how merges are
182
			 performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
183
			 can perform merges in the background using separate threads.
184
			 The SerialMergeScheduler (Lucene 2.2 default) does not.
185
		 -->
186
		<!--
187
		   <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
188
		   -->
189

  
190
		<!-- LockFactory
191

  
192
			 This option specifies which Lucene LockFactory implementation
193
			 to use.
194

  
195
			 single = SingleInstanceLockFactory - suggested for a
196
					  read-only index or when there is no possibility of
197
					  another process trying to modify the index.
198
			 native = NativeFSLockFactory - uses OS native file locking.
199
					  Do not use when multiple solr webapps in the same
200
					  JVM are attempting to share a single index.
201
			 simple = SimpleFSLockFactory  - uses a plain file for locking
202

  
203
			 Defaults: 'native' is default for Solr3.6 and later, otherwise
204
					   'simple' is the default
205

  
206
			 More details on the nuances of each LockFactory...
207
			 http://wiki.apache.org/lucene-java/AvailableLockFactories
208
		-->
209
		<lockType>\${solr.lock.type:native}</lockType>
210

  
211
		<!-- Commit Deletion Policy
212
			 Custom deletion policies can be specified here. The class must
213
			 implement org.apache.lucene.index.IndexDeletionPolicy.
214

  
215
			 The default Solr IndexDeletionPolicy implementation supports
216
			 deleting index commit points on number of commits, age of
217
			 commit point and optimized status.
218

  
219
			 The latest commit point should always be preserved regardless
220
			 of the criteria.
221
		-->
222
		<!--
223
		<deletionPolicy class="solr.SolrDeletionPolicy">
224
		-->
225
		<!-- The number of commit points to be kept -->
226
		<!-- <str name="maxCommitsToKeep">1</str> -->
227
		<!-- The number of optimized commit points to be kept -->
228
		<!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
229
		<!--
230
			Delete all commit points once they have reached the given age.
231
			Supports DateMathParser syntax e.g.
232
		  -->
233
		<!--
234
		   <str name="maxCommitAge">30MINUTES</str>
235
		   <str name="maxCommitAge">1DAY</str>
236
		-->
237
		<!--
238
		</deletionPolicy>
239
		-->
240

  
241
		<!-- Lucene Infostream
242

  
243
			 To aid in advanced debugging, Lucene provides an "InfoStream"
244
			 of detailed information when indexing.
245

  
246
			 Setting The value to true will instruct the underlying Lucene
247
			 IndexWriter to write its debugging info the specified file
248
		  -->
249
		<!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
250
	</indexConfig>
251

  
252

  
253
	<!-- JMX
254

  
255
		 This example enables JMX if and only if an existing MBeanServer
256
		 is found, use this if you want to configure JMX through JVM
257
		 parameters. Remove this to disable exposing Solr configuration
258
		 and statistics to JMX.
259

  
260
		 For more details see http://wiki.apache.org/solr/SolrJmx
261
	  -->
262
	<jmx />
263
	<!-- If you want to connect to a particular server, specify the
264
		 agentId
265
	  -->
266
	<!-- <jmx agentId="myAgent" /> -->
267
	<!-- If you want to start a new MBeanServer, specify the serviceUrl -->
268
	<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
269
	  -->
270

  
271
	<!-- The default high-performance update handler -->
272
	<updateHandler class="solr.DirectUpdateHandler2">
273

  
274
		<!-- Enables a transaction log, used for real-time get, durability, and
275
			 and solr cloud replica recovery.  The log can grow as big as
276
			 uncommitted changes to the index, so use of a hard autoCommit
277
			 is recommended (see below).
278
			 "dir" - the target directory for transaction logs, defaults to the
279
					solr data directory.
280
			 "numVersionBuckets" - sets the number of buckets used to keep
281
					track of max version values when checking for re-ordered
282
					updates; increase this value to reduce the cost of
283
					synchronizing access to version buckets during high-volume
284
					indexing, this requires 8 bytes (long) * numVersionBuckets
285
					of heap space per Solr core.
286
		-->
287
		<updateLog>
288
			<str name="dir">\${solr.ulog.dir:}</str>
289
			<int name="numVersionBuckets">\${solr.ulog.numVersionBuckets:65536}</int>
290
		</updateLog>
291

  
292
		<!-- AutoCommit
293

  
294
			 Perform a hard commit automatically under certain conditions.
295
			 Instead of enabling autoCommit, consider using "commitWithin"
296
			 when adding documents.
297

  
298
			 http://wiki.apache.org/solr/UpdateXmlMessages
299

  
300
			 maxDocs - Maximum number of documents to add since the last
301
					   commit before automatically triggering a new commit.
302

  
303
			 maxTime - Maximum amount of time in ms that is allowed to pass
304
					   since a document was added before automatically
305
					   triggering a new commit.
306
			 openSearcher - if false, the commit causes recent index changes
307
			   to be flushed to stable storage, but does not cause a new
308
			   searcher to be opened to make those changes visible.
309

  
310
			 If the updateLog is enabled, then it's highly recommended to
311
			 have some sort of hard autoCommit to limit the log size.
312
		  -->
313
		<autoCommit>
314
			<maxTime>\${solr.autoCommit.maxTime:15000}</maxTime>
315
			<openSearcher>false</openSearcher>
316
		</autoCommit>
317

  
318
		<!-- softAutoCommit is like autoCommit except it causes a
319
			 'soft' commit which only ensures that changes are visible
320
			 but does not ensure that data is synced to disk.  This is
321
			 faster and more near-realtime friendly than a hard commit.
322
		  -->
323

  
324
		<autoSoftCommit>
325
			<maxTime>\${solr.autoSoftCommit.maxTime:-1}</maxTime>
326
		</autoSoftCommit>
327

  
328
		<!-- Update Related Event Listeners
329

  
330
			 Various IndexWriter related events can trigger Listeners to
331
			 take actions.
332

  
333
			 postCommit - fired after every commit or optimize command
334
			 postOptimize - fired after every optimize command
335
		  -->
336
		<!-- The RunExecutableListener executes an external command from a
337
			 hook such as postCommit or postOptimize.
338

  
339
			 exe - the name of the executable to run
340
			 dir - dir to use as the current working directory. (default=".")
341
			 wait - the calling thread waits until the executable returns.
342
					(default="true")
343
			 args - the arguments to pass to the program.  (default is none)
344
			 env - environment variables to set.  (default is none)
345
		  -->
346
		<!-- This example shows how RunExecutableListener could be used
347
			 with the script based replication...
348
			 http://wiki.apache.org/solr/CollectionDistribution
349
		  -->
350
		<!--
351
		   <listener event="postCommit" class="solr.RunExecutableListener">
352
			 <str name="exe">solr/bin/snapshooter</str>
353
			 <str name="dir">.</str>
354
			 <bool name="wait">true</bool>
355
			 <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
356
			 <arr name="env"> <str>MYVAR=val1</str> </arr>
357
		   </listener>
358
		  -->
359

  
360
	</updateHandler>
361

  
362
	<!-- IndexReaderFactory
363

  
364
		 Use the following format to specify a custom IndexReaderFactory,
365
		 which allows for alternate IndexReader implementations.
366

  
367
		 ** Experimental Feature **
368

  
369
		 Please note - Using a custom IndexReaderFactory may prevent
370
		 certain other features from working. The API to
371
		 IndexReaderFactory may change without warning or may even be
372
		 removed from future releases if the problems cannot be
373
		 resolved.
374

  
375

  
376
		 ** Features that may not work with custom IndexReaderFactory **
377

  
378
		 The ReplicationHandler assumes a disk-resident index. Using a
379
		 custom IndexReader implementation may cause incompatibility
380
		 with ReplicationHandler and may cause replication to not work
381
		 correctly. See SOLR-1366 for details.
382

  
383
	  -->
384
	<!--
385
	<indexReaderFactory name="IndexReaderFactory" class="package.class">
386
	  <str name="someArg">Some Value</str>
387
	</indexReaderFactory >
388
	-->
389

  
390
	<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
391
		 Query section - these settings control query time things like caches
392
		 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
393
	<query>
394
		<!-- Max Boolean Clauses
395

  
396
			 Maximum number of clauses in each BooleanQuery,  an exception
397
			 is thrown if exceeded.
398

  
399
			 ** WARNING **
400

  
401
			 This option actually modifies a global Lucene property that
402
			 will affect all SolrCores.  If multiple solrconfig.xml files
403
			 disagree on this property, the value at any given moment will
404
			 be based on the last SolrCore to be initialized.
405

  
406
		  -->
407
		<maxBooleanClauses>1024</maxBooleanClauses>
408

  
409

  
410
		<!-- Solr Internal Query Caches
411

  
412
			 There are two implementations of cache available for Solr,
413
			 LRUCache, based on a synchronized LinkedHashMap, and
414
			 FastLRUCache, based on a ConcurrentHashMap.
415

  
416
			 FastLRUCache has faster gets and slower puts in single
417
			 threaded operation and thus is generally faster than LRUCache
418
			 when the hit ratio of the cache is high (> 75%), and may be
419
			 faster under other scenarios on multi-cpu systems.
420
		-->
421

  
422
		<!-- Filter Cache
423

  
424
			 Cache used by SolrIndexSearcher for filters (DocSets),
425
			 unordered sets of *all* documents that match a query.  When a
426
			 new searcher is opened, its caches may be prepopulated or
427
			 "autowarmed" using data from caches in the old searcher.
428
			 autowarmCount is the number of items to prepopulate.  For
429
			 LRUCache, the autowarmed items will be the most recently
430
			 accessed items.
431

  
432
			 Parameters:
433
			   class - the SolrCache implementation LRUCache or
434
				   (LRUCache or FastLRUCache)
435
			   size - the maximum number of entries in the cache
436
			   initialSize - the initial capacity (number of entries) of
437
				   the cache.  (see java.util.HashMap)
438
			   autowarmCount - the number of entries to prepopulate from
439
				   and old cache.
440
			   maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
441
						  to occupy. Note that when this option is specified, the size
442
						  and initialSize parameters are ignored.
443
		  -->
444
		<filterCache class="solr.FastLRUCache"
445
		             size="512"
446
		             initialSize="512"
447
		             autowarmCount="0"/>
448

  
449
		<!-- Query Result Cache
450

  
451
			 Caches results of searches - ordered lists of document ids
452
			 (DocList) based on a query, a sort, and the range of documents requested.
453
			 Additional supported parameter by LRUCache:
454
				maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
455
						   to occupy
456
		  -->
457
		<queryResultCache class="solr.LRUCache"
458
		                  size="512"
459
		                  initialSize="512"
460
		                  autowarmCount="0"/>
461

  
462
		<!-- Document Cache
463

  
464
			 Caches Lucene Document objects (the stored fields for each
465
			 document).  Since Lucene internal document ids are transient,
466
			 this cache will not be autowarmed.
467
		  -->
468
		<documentCache class="solr.LRUCache"
469
		               size="512"
470
		               initialSize="512"
471
		               autowarmCount="0"/>
472

  
473
		<!-- custom cache currently used by block join -->
474
		<cache name="perSegFilter"
475
		       class="solr.search.LRUCache"
476
		       size="10"
477
		       initialSize="0"
478
		       autowarmCount="10"
479
		       regenerator="solr.NoOpRegenerator" />
480

  
481
		<!-- Field Value Cache
482

  
483
			 Cache used to hold field values that are quickly accessible
484
			 by document id.  The fieldValueCache is created by default
485
			 even if not configured here.
486
		  -->
487
		<!--
488
		   <fieldValueCache class="solr.FastLRUCache"
489
							size="512"
490
							autowarmCount="128"
491
							showItems="32" />
492
		  -->
493

  
494
		<!-- Custom Cache
495

  
496
			 Example of a generic cache.  These caches may be accessed by
497
			 name through SolrIndexSearcher.getCache(),cacheLookup(), and
498
			 cacheInsert().  The purpose is to enable easy caching of
499
			 user/application level data.  The regenerator argument should
500
			 be specified as an implementation of solr.CacheRegenerator
501
			 if autowarming is desired.
502
		  -->
503
		<!--
504
		   <cache name="myUserCache"
505
				  class="solr.LRUCache"
506
				  size="4096"
507
				  initialSize="1024"
508
				  autowarmCount="1024"
509
				  regenerator="com.mycompany.MyRegenerator"
510
				  />
511
		  -->
512

  
513

  
514
		<!-- Lazy Field Loading
515

  
516
			 If true, stored fields that are not requested will be loaded
517
			 lazily.  This can result in a significant speed improvement
518
			 if the usual case is to not load all stored fields,
519
			 especially if the skipped fields are large compressed text
520
			 fields.
521
		-->
522
		<enableLazyFieldLoading>true</enableLazyFieldLoading>
523

  
524
		<!-- Use Filter For Sorted Query
525

  
526
			 A possible optimization that attempts to use a filter to
527
			 satisfy a search.  If the requested sort does not include
528
			 score, then the filterCache will be checked for a filter
529
			 matching the query. If found, the filter will be used as the
530
			 source of document ids, and then the sort will be applied to
531
			 that.
532

  
533
			 For most situations, this will not be useful unless you
534
			 frequently get the same search repeatedly with different sort
535
			 options, and none of them ever use "score"
536
		  -->
537
		<!--
538
		   <useFilterForSortedQuery>true</useFilterForSortedQuery>
539
		  -->
540

  
541
		<!-- Result Window Size
542

  
543
			 An optimization for use with the queryResultCache.  When a search
544
			 is requested, a superset of the requested number of document ids
545
			 are collected.  For example, if a search for a particular query
546
			 requests matching documents 10 through 19, and queryWindowSize is 50,
547
			 then documents 0 through 49 will be collected and cached.  Any further
548
			 requests in that range can be satisfied via the cache.
549
		  -->
550
		<queryResultWindowSize>20</queryResultWindowSize>
551

  
552
		<!-- Maximum number of documents to cache for any entry in the
553
			 queryResultCache.
554
		  -->
555
		<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
556

  
557
		<!-- Query Related Event Listeners
558

  
559
			 Various IndexSearcher related events can trigger Listeners to
560
			 take actions.
561

  
562
			 newSearcher - fired whenever a new searcher is being prepared
563
			 and there is a current searcher handling requests (aka
564
			 registered).  It can be used to prime certain caches to
565
			 prevent long request times for certain requests.
566

  
567
			 firstSearcher - fired whenever a new searcher is being
568
			 prepared but there is no current registered searcher to handle
569
			 requests or to gain autowarming data from.
570

  
571

  
572
		  -->
573
		<!-- QuerySenderListener takes an array of NamedList and executes a
574
			 local query request for each NamedList in sequence.
575
		  -->
576
		<listener event="newSearcher" class="solr.QuerySenderListener">
577
			<arr name="queries">
578
				<!--
579
				   <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
580
				   <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
581
				  -->
582
			</arr>
583
		</listener>
584
		<listener event="firstSearcher" class="solr.QuerySenderListener">
585
			<arr name="queries">
586
				<!--
587
				<lst>
588
				  <str name="q">static firstSearcher warming in solrconfig.xml</str>
589
				</lst>
590
				-->
591
			</arr>
592
		</listener>
593

  
594
		<!-- Use Cold Searcher
595

  
596
			 If a search request comes in and there is no current
597
			 registered searcher, then immediately register the still
598
			 warming searcher and use it.  If "false" then all requests
599
			 will block until the first searcher is done warming.
600
		  -->
601
		<useColdSearcher>false</useColdSearcher>
602

  
603
	</query>
604

  
605

  
606
	<!-- Request Dispatcher
607

  
608
		 This section contains instructions for how the SolrDispatchFilter
609
		 should behave when processing requests for this SolrCore.
610

  
611
		 handleSelect is a legacy option that affects the behavior of requests
612
		 such as /select?qt=XXX
613

  
614
		 handleSelect="true" will cause the SolrDispatchFilter to process
615
		 the request and dispatch the query to a handler specified by the
616
		 "qt" param, assuming "/select" isn't already registered.
617

  
618
		 handleSelect="false" will cause the SolrDispatchFilter to
619
		 ignore "/select" requests, resulting in a 404 unless a handler
620
		 is explicitly registered with the name "/select"
621

  
622
		 handleSelect="true" is not recommended for new users, but is the default
623
		 for backwards compatibility
624
	  -->
625
	<requestDispatcher handleSelect="false" >
626
		<!-- Request Parsing
627

  
628
			 These settings indicate how Solr Requests may be parsed, and
629
			 what restrictions may be placed on the ContentStreams from
630
			 those requests
631

  
632
			 enableRemoteStreaming - enables use of the stream.file
633
			 and stream.url parameters for specifying remote streams.
634

  
635
			 multipartUploadLimitInKB - specifies the max size (in KiB) of
636
			 Multipart File Uploads that Solr will allow in a Request.
637

  
638
			 formdataUploadLimitInKB - specifies the max size (in KiB) of
639
			 form data (application/x-www-form-urlencoded) sent via
640
			 POST. You can use POST to pass request parameters not
641
			 fitting into the URL.
642

  
643
			 addHttpRequestToContext - if set to true, it will instruct
644
			 the requestParsers to include the original HttpServletRequest
645
			 object in the context map of the SolrQueryRequest under the
646
			 key "httpRequest". It will not be used by any of the existing
647
			 Solr components, but may be useful when developing custom
648
			 plugins.
649

  
650
			 *** WARNING ***
651
			 The settings below authorize Solr to fetch remote files, You
652
			 should make sure your system has some authentication before
653
			 using enableRemoteStreaming="true"
654

  
655
		  -->
656
		<requestParsers enableRemoteStreaming="true"
657
		                multipartUploadLimitInKB="2048000"
658
		                formdataUploadLimitInKB="2048"
659
		                addHttpRequestToContext="false"/>
660

  
661
		<!-- HTTP Caching
662

  
663
			 Set HTTP caching related parameters (for proxy caches and clients).
664

  
665
			 The options below instruct Solr not to output any HTTP Caching
666
			 related headers
667
		  -->
668
		<httpCaching never304="true" />
669
		<!-- If you include a <cacheControl> directive, it will be used to
670
			 generate a Cache-Control header (as well as an Expires header
671
			 if the value contains "max-age=")
672

  
673
			 By default, no Cache-Control header is generated.
674

  
675
			 You can use the <cacheControl> option even if you have set
676
			 never304="true"
677
		  -->
678
		<!--
679
		   <httpCaching never304="true" >
680
			 <cacheControl>max-age=30, public</cacheControl>
681
		   </httpCaching>
682
		  -->
683
		<!-- To enable Solr to respond with automatically generated HTTP
684
			 Caching headers, and to response to Cache Validation requests
685
			 correctly, set the value of never304="false"
686

  
687
			 This will cause Solr to generate Last-Modified and ETag
688
			 headers based on the properties of the Index.
689

  
690
			 The following options can also be specified to affect the
691
			 values of these headers...
692

  
693
			 lastModFrom - the default value is "openTime" which means the
694
			 Last-Modified value (and validation against If-Modified-Since
695
			 requests) will all be relative to when the current Searcher
696
			 was opened.  You can change it to lastModFrom="dirLastMod" if
697
			 you want the value to exactly correspond to when the physical
698
			 index was last modified.
699

  
700
			 etagSeed="..." is an option you can change to force the ETag
701
			 header (and validation against If-None-Match requests) to be
702
			 different even if the index has not changed (ie: when making
703
			 significant changes to your config file)
704

  
705
			 (lastModifiedFrom and etagSeed are both ignored if you use
706
			 the never304="true" option)
707
		  -->
708
		<!--
709
		   <httpCaching lastModifiedFrom="openTime"
710
						etagSeed="Solr">
711
			 <cacheControl>max-age=30, public</cacheControl>
712
		   </httpCaching>
713
		  -->
714
	</requestDispatcher>
715

  
716
	<!-- Request Handlers
717

  
718
		 http://wiki.apache.org/solr/SolrRequestHandler
719

  
720
		 Incoming queries will be dispatched to a specific handler by name
721
		 based on the path specified in the request.
722

  
723
		 Legacy behavior: If the request path uses "/select" but no Request
724
		 Handler has that name, and if handleSelect="true" has been specified in
725
		 the requestDispatcher, then the Request Handler is dispatched based on
726
		 the qt parameter.  Handlers without a leading '/' are accessed this way
727
		 like so: http://host/app/[core/]select?qt=name  If no qt is
728
		 given, then the requestHandler that declares default="true" will be
729
		 used or the one named "standard".
730

  
731
		 If a Request Handler is declared with startup="lazy", then it will
732
		 not be initialized until the first request that uses it.
733

  
734
	  -->
735
	<!-- SearchHandler
736

  
737
		 http://wiki.apache.org/solr/SearchHandler
738

  
739
		 For processing Search Queries, the primary Request Handler
740
		 provided with Solr is "SearchHandler" It delegates to a sequent
741
		 of SearchComponents (see below) and supports distributed
742
		 queries across multiple shards
743
	  -->
744
	<requestHandler name="/select" class="solr.SearchHandler">
745
		<!-- default values for query parameters can be specified, these
746
			 will be overridden by parameters in the request
747
		  -->
748
		<lst name="defaults">
749
			<str name="echoParams">explicit</str>
750
			<str name="q.op">AND</str>
751
			<int name="rows">10</int>
752
			<!-- <str name="df">text</str> -->
753
		</lst>
754
		<!-- In addition to defaults, "appends" params can be specified
755
			 to identify values which should be appended to the list of
756
			 multi-val params from the query (or the existing "defaults").
757
		  -->
758
		<!-- In this example, the param "fq=instock:true" would be appended to
759
			 any query time fq params the user may specify, as a mechanism for
760
			 partitioning the index, independent of any user selected filtering
761
			 that may also be desired (perhaps as a result of faceted searching).
762

  
763
			 NOTE: there is *absolutely* nothing a client can do to prevent these
764
			 "appends" values from being used, so don't use this mechanism
765
			 unless you are sure you always want it.
766
		  -->
767
		<!--
768
		   <lst name="appends">
769
			 <str name="fq">inStock:true</str>
770
		   </lst>
771
		  -->
772
		<!-- "invariants" are a way of letting the Solr maintainer lock down
773
			 the options available to Solr clients.  Any params values
774
			 specified here are used regardless of what values may be specified
775
			 in either the query, the "defaults", or the "appends" params.
776

  
777
			 In this example, the facet.field and facet.query params would
778
			 be fixed, limiting the facets clients can use.  Faceting is
779
			 not turned on by default - but if the client does specify
780
			 facet=true in the request, these are the only facets they
781
			 will be able to see counts for; regardless of what other
782
			 facet.field or facet.query params they may specify.
783

  
784
			 NOTE: there is *absolutely* nothing a client can do to prevent these
785
			 "invariants" values from being used, so don't use this mechanism
786
			 unless you are sure you always want it.
787
		  -->
788
		<!--
789
		   <lst name="invariants">
790
			 <str name="facet.field">cat</str>
791
			 <str name="facet.field">manu_exact</str>
792
			 <str name="facet.query">price:[* TO 500]</str>
793
			 <str name="facet.query">price:[500 TO *]</str>
794
		   </lst>
795
		  -->
796
		<!-- If the default list of SearchComponents is not desired, that
797
			 list can either be overridden completely, or components can be
798
			 prepended or appended to the default list.  (see below)
799
		  -->
800
		<!--
801
		   <arr name="components">
802
			 <str>nameOfCustomComponent1</str>
803
			 <str>nameOfCustomComponent2</str>
804
		   </arr>
805
		  -->
806
	</requestHandler>
807

  
808
    <!-- a query handler tailored for OpenAIRE-Explore keyword search -->
809
    <requestHandler name="/exploreSearch" class="solr.SearchHandler">
810
        <!-- default values for query parameters can be specified, these
811
             will be overridden by parameters in the request
812
          -->
813
        <lst name="defaults">
814
            <str name="echoParams">explicit</str>
815
            <str name="q.op">AND</str>
816
            <int name="rows">10</int>
817

  
818
            <!-- The eDisMax query parser is used -->
819
            <str name="defType">edismax</str>
820

  
821
            <!-- adjusts how much the final score is influenced by lower scoring fields -->
822
            <float name="tie">1.0</float>
823

  
824
            <!-- query fields and their boost factors -->
825
            <str name="qf">
826
                resultidentifier^100
827
                resultauthor^34
828
                resultacceptanceyear^21
829
                resultsubject^13
830
                resulttitle^8
831
                relprojectname^5
832
                resultdescription^3
833
                __all
834
            </str>
835
        </lst>
836
    </requestHandler>
837

  
838
	<!-- A request handler that returns indented JSON by default -->
839
	<requestHandler name="/query" class="solr.SearchHandler">
840
		<lst name="defaults">
841
			<str name="echoParams">explicit</str>
842
			<str name="wt">json</str>
843
			<str name="indent">true</str>
844
		</lst>
845
	</requestHandler>
846

  
847

  
848
	<!-- A Robust Example
849

  
850
		 This example SearchHandler declaration shows off usage of the
851
		 SearchHandler with many defaults declared
852

  
853
		 Note that multiple instances of the same Request Handler
854
		 (SearchHandler) can be registered multiple times with different
855
		 names (and different init parameters)
856
	  -->
857
	<requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse">
858
		<lst name="defaults">
859
			<str name="echoParams">explicit</str>
860
		</lst>
861
	</requestHandler>
862

  
863
	<initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
864
		<lst name="defaults">
865
			<str name="df">__all</str>
866
		</lst>
867
	</initParams>
868

  
869
	<!-- This enabled schemaless mode
870
	<initParams path="/update/**">
871
	  <lst name="defaults">
872
		<str name="update.chain">add-unknown-fields-to-the-schema</str>
873
	  </lst>
874
	</initParams>
875
	-->
876

  
877
	<!-- Solr Cell Update Request Handler
878

  
879
		 http://wiki.apache.org/solr/ExtractingRequestHandler
880

  
881
	  -->
882
	<requestHandler name="/update/extract"
883
	                startup="lazy"
884
	                class="solr.extraction.ExtractingRequestHandler" >
885
		<lst name="defaults">
886
			<str name="lowernames">true</str>
887
			<str name="fmap.meta">ignored_</str>
888
			<str name="fmap.content">__all</str>
889
		</lst>
890
	</requestHandler>
891
	<!-- Search Components
892

  
893
		 Search components are registered to SolrCore and used by
894
		 instances of SearchHandler (which can access them by name)
895

  
896
		 By default, the following components are available:
897

  
898
		 <searchComponent name="query"     class="solr.QueryComponent" />
899
		 <searchComponent name="facet"     class="solr.FacetComponent" />
900
		 <searchComponent name="mlt"       class="solr.MoreLikeThisComponent" />
901
		 <searchComponent name="highlight" class="solr.HighlightComponent" />
902
		 <searchComponent name="stats"     class="solr.StatsComponent" />
903
		 <searchComponent name="debug"     class="solr.DebugComponent" />
904

  
905
		 Default configuration in a requestHandler would look like:
906

  
907
		 <arr name="components">
908
		   <str>query</str>
909
		   <str>facet</str>
910
		   <str>mlt</str>
911
		   <str>highlight</str>
912
		   <str>stats</str>
913
		   <str>debug</str>
914
		 </arr>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff