1
|
pace.conf {
|
2
|
clustering {
|
3
|
acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} },
|
4
|
ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} },
|
5
|
suffixprefix { fields = [title], params = { max = 1, len = 3 } }
|
6
|
},
|
7
|
strictconditions {
|
8
|
doiExactMatch { fields = [pid] }
|
9
|
},
|
10
|
conditions {
|
11
|
yearMatch { fields = [dateofacceptance] },
|
12
|
titleVersionMatch { fields = [title] },
|
13
|
sizeMatch { fields = [authors] }
|
14
|
},
|
15
|
model {
|
16
|
pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value, overrideMatch = true },
|
17
|
title { algo = JaroWinkler, type = String, weight = 0.75, ignoreMissing = false, path = result/metadata/title/value },
|
18
|
dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value },
|
19
|
authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.25, ignoreMissing = true, path = result/author/metadata/fullname/value }
|
20
|
},
|
21
|
blacklists = {
|
22
|
title = [
|
23
|
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
|
24
|
"^(Kiri Karl Morgensternile).*$",
|
25
|
"^(\\[Eksliibris Aleksandr).*\\]$",
|
26
|
"^(\\[Eksliibris Aleksandr).*$",
|
27
|
"^(Eksliibris Aleksandr).*$",
|
28
|
"^(Kiri A\\. de Vignolles).*$",
|
29
|
"^(2 kirja Karl Morgensternile).*$",
|
30
|
"^(Pirita kloostri idaosa arheoloogilised).*$",
|
31
|
"^(Kiri tundmatule).*$",
|
32
|
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
|
33
|
"^(Eksliibris Nikolai Birukovile).*$",
|
34
|
"^(Eksliibris Nikolai Issakovile).*$",
|
35
|
"^(WHP Cruise Summary Information of section).*$",
|
36
|
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
|
37
|
"^(Measurement of the spin\\-dependent structure function).*"
|
38
|
] }
|
39
|
}
|