Project

General

Profile

1
pace.conf { 
2
	clustering { 
3
		acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, 
4
		ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, 
5
		suffixprefix { fields = [title], params = { max = 1, len = 3 } }
6
	}, 
7
	strictconditions {
8
		doiExactMatch { fields = [pid] }
9
	}, 	
10
	conditions { 
11
		yearMatch { fields = [dateofacceptance] }, 
12
		titleVersionMatch { fields = [title] },
13
		sizeMatch { fields = [authors] }   
14
	},
15
	model { 
16
		pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value, overrideMatch = true }, 	
17
		title { algo = JaroWinkler, type = String, weight = 0.75, ignoreMissing = false, path = result/metadata/title/value },
18
		dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value },
19
		authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.25, ignoreMissing = true, path = result/author/metadata/fullname/value } 		
20
	}, 
21
	blacklists = {
22
		title = [
23
			"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
24
			"^(Kiri Karl Morgensternile).*$",
25
			"^(\\[Eksliibris Aleksandr).*\\]$",
26
			"^(\\[Eksliibris Aleksandr).*$",
27
			"^(Eksliibris Aleksandr).*$",
28
			"^(Kiri A\\. de Vignolles).*$",
29
			"^(2 kirja Karl Morgensternile).*$",
30
			"^(Pirita kloostri idaosa arheoloogilised).*$",
31
			"^(Kiri tundmatule).*$",
32
			"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
33
			"^(Eksliibris Nikolai Birukovile).*$",
34
			"^(Eksliibris Nikolai Issakovile).*$",
35
			"^(WHP Cruise Summary Information of section).*$",
36
			"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
37
			"^(Measurement of the spin\\-dependent structure function).*"
38
		] }
39
}
(3-3/5)