Project

General

Profile

1
pace.conf { 
2
	clustering { 
3
		acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, 
4
		ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, 
5
		suffixprefix { fields = [title], params = { max = 1, len = 3 } }
6
	}, 
7
	conditions { 
8
		titleVersionMatch { fields = [title] },
9
		sizeMatch { fields = [authors] }  
10
	},
11
	model { 
12
		title { algo = JaroWinkler, type = String, weight = 0.5, ignoreMissing = false, path = result/metadata/title/value },
13
		authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.5, ignoreMissing = true, path = result/author/metadata/fullname/value } 		
14
	}, 
15
	blacklists = {
16
		title = [
17
			"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
18
			"^(Kiri Karl Morgensternile).*$",
19
			"^(\\[Eksliibris Aleksandr).*\\]$",
20
			"^(\\[Eksliibris Aleksandr).*$",
21
			"^(Eksliibris Aleksandr).*$",
22
			"^(Kiri A\\. de Vignolles).*$",
23
			"^(2 kirja Karl Morgensternile).*$",
24
			"^(Pirita kloostri idaosa arheoloogilised).*$",
25
			"^(Kiri tundmatule).*$",
26
			"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
27
			"^(Eksliibris Nikolai Birukovile).*$",
28
			"^(Eksliibris Nikolai Issakovile).*$",
29
			"^(WHP Cruise Summary Information of section).*$",
30
			"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
31
			"^(Measurement of the spin\\-dependent structure function).*"
32
		] }
33
}
(2-2/5)