pace.conf { clustering { acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, suffixprefix { fields = [title], params = { max = 1, len = 3 } } }, strictconditions { doiExactMatch { fields = [pid] } }, conditions { yearMatch { fields = [dateofacceptance] }, titleVersionMatch { fields = [title] }, sizeMatch { fields = [authors] } }, model { pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value, overrideMatch = true }, title { algo = JaroWinkler, type = String, weight = 0.75, ignoreMissing = false, path = result/metadata/title/value }, dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value }, authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.25, ignoreMissing = true, path = result/author/metadata/fullname/value } }, blacklists = { title = [ "^(Corpus Oral Dialectal \\(COD\\)\\.).*$", "^(Kiri Karl Morgensternile).*$", "^(\\[Eksliibris Aleksandr).*\\]$", "^(\\[Eksliibris Aleksandr).*$", "^(Eksliibris Aleksandr).*$", "^(Kiri A\\. de Vignolles).*$", "^(2 kirja Karl Morgensternile).*$", "^(Pirita kloostri idaosa arheoloogilised).*$", "^(Kiri tundmatule).*$", "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", "^(Eksliibris Nikolai Birukovile).*$", "^(Eksliibris Nikolai Issakovile).*$", "^(WHP Cruise Summary Information of section).*$", "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", "^(Measurement of the spin\\-dependent structure function).*" ] } }