Project

General

Profile

1 33136 claudio.at
pace.conf {
2
	clustering {
3
		acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} },
4
		ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} },
5
		suffixprefix { fields = [title], params = { max = 1, len = 3 } }
6
	},
7
	conditions {
8
		titleVersionMatch { fields = [title] },
9
		sizeMatch { fields = [authors] }
10
	},
11
	model {
12
		title { algo = JaroWinkler, type = String, weight = 0.5, ignoreMissing = false, path = result/metadata/title/value },
13
		authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.5, ignoreMissing = true, path = result/author/metadata/fullname/value }
14
	},
15
	blacklists = {
16
		title = [
17
			"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
18
			"^(Kiri Karl Morgensternile).*$",
19
			"^(\\[Eksliibris Aleksandr).*\\]$",
20
			"^(\\[Eksliibris Aleksandr).*$",
21
			"^(Eksliibris Aleksandr).*$",
22
			"^(Kiri A\\. de Vignolles).*$",
23
			"^(2 kirja Karl Morgensternile).*$",
24
			"^(Pirita kloostri idaosa arheoloogilised).*$",
25
			"^(Kiri tundmatule).*$",
26
			"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
27
			"^(Eksliibris Nikolai Birukovile).*$",
28
			"^(Eksliibris Nikolai Issakovile).*$",
29
			"^(WHP Cruise Summary Information of section).*$",
30
			"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
31
			"^(Measurement of the spin\\-dependent structure function).*"
32
		] }
33
}