1
|
{
|
2
|
"wf" : {
|
3
|
"threshold" : "0.99",
|
4
|
"run" : "001",
|
5
|
"entityType" : "result",
|
6
|
"orderField" : "title",
|
7
|
"queueMaxSize" : "2000",
|
8
|
"groupMaxSize" : "10",
|
9
|
"slidingWindowSize" : "200",
|
10
|
"rootBuilder" : [ "result" ],
|
11
|
"includeChildren" : "true"
|
12
|
},
|
13
|
"pace" : {
|
14
|
"clustering" : [
|
15
|
{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} },
|
16
|
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
17
|
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }
|
18
|
],
|
19
|
"strictConditions" : [
|
20
|
{ "name" : "exactMatch", "fields" : [ "pid" ] }
|
21
|
],
|
22
|
"conditions" : [
|
23
|
{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] },
|
24
|
{ "name" : "titleVersionMatch", "fields" : [ "title" ] },
|
25
|
{ "name" : "sizeMatch", "fields" : [ "authors" ] }
|
26
|
],
|
27
|
"model" : [
|
28
|
{ "name" : "pid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value", "overrideMatch" : "true" },
|
29
|
{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
|
30
|
{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" } ,
|
31
|
{ "name" : "authors", "algo" : "Null", "type" : "List", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" },
|
32
|
{
|
33
|
"name": "anchors",
|
34
|
"algo": "PersonCoAnchorsDistance",
|
35
|
"type": "JSON",
|
36
|
"weight": "0.0",
|
37
|
"ignoreMissing": "true",
|
38
|
"path": "person",
|
39
|
"params": {
|
40
|
"common.anchors": "1"
|
41
|
}
|
42
|
},
|
43
|
{
|
44
|
"name": "coauthor",
|
45
|
"algo": "PersonCoAuthorSurnamesDistance",
|
46
|
"type": "JSON",
|
47
|
"weight": "0.0",
|
48
|
"ignoreMissing": "true",
|
49
|
"path": "person",
|
50
|
"params": {
|
51
|
"common.surnames": "2"
|
52
|
}
|
53
|
},
|
54
|
{
|
55
|
"name": "person",
|
56
|
"algo": "PersonDistance",
|
57
|
"type": "JSON",
|
58
|
"weight": "0.0",
|
59
|
"ignoreMissing": "true",
|
60
|
"path": "person",
|
61
|
"params": {
|
62
|
"common.surnames": "2"
|
63
|
}
|
64
|
}
|
65
|
],
|
66
|
"blacklists" : {
|
67
|
"title" : [
|
68
|
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
|
69
|
"^(Kiri Karl Morgensternile).*$",
|
70
|
"^(\\[Eksliibris Aleksandr).*\\]$",
|
71
|
"^(\\[Eksliibris Aleksandr).*$",
|
72
|
"^(Eksliibris Aleksandr).*$",
|
73
|
"^(Kiri A\\. de Vignolles).*$",
|
74
|
"^(2 kirja Karl Morgensternile).*$",
|
75
|
"^(Pirita kloostri idaosa arheoloogilised).*$",
|
76
|
"^(Kiri tundmatule).*$",
|
77
|
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
|
78
|
"^(Eksliibris Nikolai Birukovile).*$",
|
79
|
"^(Eksliibris Nikolai Issakovile).*$",
|
80
|
"^(WHP Cruise Summary Information of section).*$",
|
81
|
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
|
82
|
"^(Measurement of the spin\\-dependent structure function).*"
|
83
|
] }
|
84
|
}
|
85
|
|
86
|
}
|