Project

General

Profile

1 33136 claudio.at
pace.conf {
2
	clustering {
3
		acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} },
4
		ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} },
5
		suffixprefix { fields = [title], params = { max = 1, len = 3 } }
6
	},
7
	strictconditions {
8
		doiExactMatch { fields = [pid] }
9
	},
10
	conditions {
11
		yearMatch { fields = [dateofacceptance] },
12
		titleVersionMatch { fields = [title] },
13
		sizeMatch { fields = [authors] }
14
	},
15
	model {
16
		pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value, overrideMatch = true },
17
		title { algo = JaroWinkler, type = String, weight = 0.75, ignoreMissing = false, path = result/metadata/title/value },
18
		dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value },
19
		authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.25, ignoreMissing = true, path = result/author/metadata/fullname/value }
20
	},
21
	blacklists = {
22
		title = [
23
			"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
24
			"^(Kiri Karl Morgensternile).*$",
25
			"^(\\[Eksliibris Aleksandr).*\\]$",
26
			"^(\\[Eksliibris Aleksandr).*$",
27
			"^(Eksliibris Aleksandr).*$",
28
			"^(Kiri A\\. de Vignolles).*$",
29
			"^(2 kirja Karl Morgensternile).*$",
30
			"^(Pirita kloostri idaosa arheoloogilised).*$",
31
			"^(Kiri tundmatule).*$",
32
			"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
33
			"^(Eksliibris Nikolai Birukovile).*$",
34
			"^(Eksliibris Nikolai Issakovile).*$",
35
			"^(WHP Cruise Summary Information of section).*$",
36
			"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
37
			"^(Measurement of the spin\\-dependent structure function).*"
38
		] }
39
}