Revision 50230
Added by Claudio Atzori almost 6 years ago
result.prod.pace.conf | ||
---|---|---|
1 |
{ |
|
2 |
"wf" : { |
|
3 |
"threshold" : "0.99", |
|
4 |
"dedupRun" : "001", |
|
5 |
"entityType" : "result", |
|
6 |
"orderField" : "title", |
|
7 |
"queueMaxSize" : "2000", |
|
8 |
"groupMaxSize" : "10", |
|
9 |
"slidingWindowSize" : "200", |
|
10 |
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments" ], |
|
11 |
"includeChildren" : "true" |
|
12 |
}, |
|
13 |
"pace" : { |
|
1 |
{ |
|
2 |
"wf" : { |
|
3 |
"threshold" : "0.99", |
|
4 |
"dedupRun" : "001", |
|
5 |
"entityType" : "result", |
|
6 |
"orderField" : "title", |
|
7 |
"queueMaxSize" : "4000", |
|
8 |
"groupMaxSize" : "40", |
|
9 |
"slidingWindowSize" : "200", |
|
10 |
"rootBuilder" : [ "result", "personResult_authorship_hasAuthor", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments" ], |
|
11 |
"includeChildren" : "true", |
|
12 |
"maxChildren" : "40" |
|
13 |
}, |
|
14 |
"pace" : { |
|
14 | 15 |
"clustering" : [ |
15 | 16 |
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} }, |
16 |
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } } |
|
17 |
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }, |
|
18 |
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } } |
|
17 | 19 |
], |
18 | 20 |
"strictConditions" : [ |
19 |
{ "name" : "exactMatch", "fields" : [ "pid" ] }
|
|
20 |
],
|
|
21 |
"conditions" : [
|
|
22 |
{ "name" : "titleVersionMatch", "fields" : [ "title" ] },
|
|
23 |
{ "name" : "sizeMatch", "fields" : [ "authors" ] }
|
|
24 |
],
|
|
21 |
{ "name" : "pidMatch", "fields" : [ "pid" ] }
|
|
22 |
], |
|
23 |
"conditions" : [
|
|
24 |
{ "name" : "titleVersionMatch", "fields" : [ "title" ] }, |
|
25 |
{ "name" : "sizeMatch", "fields" : [ "authors" ] } |
|
26 |
],
|
|
25 | 27 |
"model" : [ |
26 |
{ "name" : "pid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value", "overrideMatch" : "true" }, |
|
28 |
{ "name" : "doi", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value" }, |
|
29 |
{ "name" : "pid", "algo" : "Null", "type" : "JSON", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid", "overrideMatch" : "true" }, |
|
27 | 30 |
{ "name" : "title", "algo" : "LevensteinTitle", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" }, |
28 |
{ "name" : "authors", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/author/fullname" }
|
|
31 |
{ "name" : "authors", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" }
|
|
29 | 32 |
], |
30 |
"blacklists" : {
|
|
33 |
"blacklists" : { |
|
31 | 34 |
"title" : [ |
32 | 35 |
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$", |
33 | 36 |
"^Problems with perinatal pathology\.?$", |
... | ... | |
38 | 41 |
"^Cartas? ao editor Letters? to the Editor$", |
39 | 42 |
"^Note from the Editor$", |
40 | 43 |
"^Anesthesia Abstract$", |
41 |
|
|
44 |
|
|
42 | 45 |
"^Annual report$", |
43 | 46 |
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\.?”?$", |
44 | 47 |
"(?i)^Graph and Table of Infectious Diseases?$", |
... | ... | |
58 | 61 |
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\..*\.$", |
59 | 62 |
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\.?$", |
60 | 63 |
"^Gushi hakubutsugaku$", |
61 |
|
|
62 |
"^Starobosanski nadpisi u Bosni i Hercegovini \(.*\)$",
|
|
64 |
|
|
65 |
"^Starobosanski nadpisi u Bosni i Hercegovini \(.*\)$", |
|
63 | 66 |
"^Intestinal spirocha?etosis$", |
64 | 67 |
"^Treatment of Rodent Ulcer$", |
65 | 68 |
"(?i)^\W*Cloud Computing\W*$", |
66 |
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
|
|
69 |
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$", |
|
67 | 70 |
"^Free Communications, Poster Presentations: Session [A-F]$", |
68 |
|
|
71 |
|
|
69 | 72 |
"^“The Historical Aspects? of Quackery\.?”$", |
70 | 73 |
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$", |
71 | 74 |
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$", |
72 |
"(?i)^Case Report$",
|
|
75 |
"(?i)^Case Report$", |
|
73 | 76 |
"^Boletín Informativo$", |
74 | 77 |
"(?i)^Glioblastoma Multiforme$", |
75 | 78 |
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$", |
76 | 79 |
"^Zaměstnanecké výhody$", |
77 | 80 |
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$", |
78 |
"(?i)^Carotid body tumours?\\.?$",
|
|
81 |
"(?i)^Carotid body tumours?\\.?$", |
|
79 | 82 |
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$", |
80 | 83 |
"^Avant-propos$", |
81 | 84 |
"(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$", |
82 | 85 |
"(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Bases?$", |
83 |
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
|
|
86 |
"(?i)^PUBLIC HEALTH VERSUS THE STATE$", |
|
84 | 87 |
"^Viñetas de Cortázar$", |
85 | 88 |
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\.)?$", |
86 |
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\.?)$",
|
|
89 |
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\.?)$", |
|
87 | 90 |
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$", |
88 | 91 |
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$", |
89 |
|
|
92 |
|
|
90 | 93 |
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$", |
91 |
"^Aus der AGMB$",
|
|
92 |
|
|
94 |
"^Aus der AGMB$", |
|
95 |
|
|
93 | 96 |
"^Znanstveno-stručni prilozi$", |
94 |
"^Zhodnocení finanční situace podniku a návrhy na zlepšení$", |
|
95 |
"^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$", |
|
97 |
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$", |
|
98 |
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$", |
|
99 |
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$", |
|
96 | 100 |
"^Finanční analýza podniku$", |
97 | 101 |
"^Financial analysis( of business)?$", |
98 | 102 |
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$", |
99 | 103 |
"^Jikken nihon shūshinsho$", |
100 | 104 |
"(?i)^CORONER('|s)(s|') INQUESTS$", |
101 |
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
|
|
105 |
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$", |
|
102 | 106 |
"(?i)^Consultants' contract(s)?$", |
103 | 107 |
"(?i)^Upute autorima$", |
104 | 108 |
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$", |
... | ... | |
114 | 118 |
"(?i)^cardiac rehabilitation$", |
115 | 119 |
"(?i)^Analytical summary$", |
116 | 120 |
"^Thesaurus resolutionum Sacrae Congregationis Concilii$", |
117 |
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
|
|
121 |
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$", |
|
118 | 122 |
"^Prikazi i osvrti$", |
119 | 123 |
"^Rodinný dům s provozovnou$", |
120 | 124 |
"^Family house with an establishment$", |
... | ... | |
125 | 129 |
"(?i)^RUBRIKA UREDNIKA$", |
126 | 130 |
"^A Matching Model of the Academic Publication Market$", |
127 | 131 |
"^Yōgaku kōyō$", |
128 |
|
|
132 |
|
|
129 | 133 |
"^Internetový marketing$", |
130 | 134 |
"^Internet marketing$", |
131 | 135 |
"^Chūtō kokugo dokuhon$", |
132 | 136 |
"^Kokugo dokuhon$", |
133 | 137 |
"^Antibiotic Cover for Dental Extraction(s?)$", |
134 |
"^Strategie podniku$",
|
|
138 |
"^Strategie podniku$", |
|
135 | 139 |
"^Strategy of an Enterprise$", |
136 | 140 |
"(?i)^respiratory disease(s?)(\.?)$", |
137 | 141 |
"^Award(s?) for Gallantry in Civil Defence$", |
... | ... | |
158 | 162 |
"^Information System Assessment and Proposal for ICT Modification$", |
159 | 163 |
"^Stresové zatížení pracovníků ve vybrané profesi$", |
160 | 164 |
"^Stress load in a specific job$", |
161 |
|
|
165 |
|
|
162 | 166 |
"^Sunday: Poster Sessions, Pt.*$", |
163 | 167 |
"^Monday: Poster Sessions, Pt.*$", |
164 | 168 |
"^Wednesday: Poster Sessions, Pt.*", |
165 | 169 |
"^Tuesday: Poster Sessions, Pt.*$", |
166 |
|
|
170 |
|
|
167 | 171 |
"^Analýza reklamy$", |
168 | 172 |
"^Analysis of advertising$", |
169 |
|
|
173 |
|
|
170 | 174 |
"^Shōgaku shūshinsho$", |
171 | 175 |
"^Shōgaku sansū$", |
172 | 176 |
"^Shintei joshi kokubun$", |
173 | 177 |
"^Taishō joshi kokubun dokuhon$", |
174 |
"^Joshi kokubun$",
|
|
175 |
|
|
178 |
"^Joshi kokubun$", |
|
179 |
|
|
176 | 180 |
"^Účetní uzávěrka a účetní závěrka v ČR$", |
177 | 181 |
"(?i)^The \"?Causes\"? of Cancer$", |
178 | 182 |
"^Normas para la publicación de artículos$", |
179 | 183 |
"^Editor('|s)(s|') [Rr]eply$", |
180 | 184 |
"^Editor(’|s)(s|’) letter$", |
181 |
"^Redaktoriaus žodis$",
|
|
185 |
"^Redaktoriaus žodis$", |
|
182 | 186 |
"^DISCUSSION ON THE PRECEDING PAPER$", |
183 | 187 |
"^Kōtō shōgaku shūshinsho jidōyō$", |
184 | 188 |
"^Shōgaku nihon rekishi$", |
... | ... | |
190 | 194 |
"^Financial statements in selected company$", |
191 | 195 |
"^Abdominal [Aa]ortic [Aa]neurysms.*$", |
192 | 196 |
"^Pseudomyxoma peritonei$", |
193 |
"^Kazalo autora$",
|
|
194 |
|
|
197 |
"^Kazalo autora$", |
|
198 |
|
|
195 | 199 |
"(?i)^uvodna riječ$", |
196 | 200 |
"^Motivace jako způsob vedení lidí$", |
197 | 201 |
"^Motivation as a leadership$", |
198 | 202 |
"^Polyfunkční dům$", |
199 | 203 |
"^Multi\\-funkcional building$", |
200 | 204 |
"^Podnikatelský plán$", |
201 |
"^Business Plan$", |
|
205 |
"(?i)^Podnikatelský záměr$", |
|
206 |
"(?i)^Business Plan$", |
|
202 | 207 |
"^Oceňování nemovitostí$", |
203 | 208 |
"^Marketingová komunikace$", |
204 | 209 |
"^Marketing communication$", |
... | ... | |
244 | 249 |
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*", |
245 | 250 |
"^Editorial( Board)?$", |
246 | 251 |
"(?i)^Editorial \\(English\\)$", |
247 |
"^Editörden$",
|
|
252 |
"^Editörden$", |
|
248 | 253 |
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$", |
249 | 254 |
"^(Kiri Karl Morgensternile).*$", |
250 | 255 |
"^(\\[Eksliibris Aleksandr).*\\]$", |
... | ... | |
259 | 264 |
"^(Eksliibris Nikolai Issakovile).*$", |
260 | 265 |
"^(WHP Cruise Summary Information of section).*$", |
261 | 266 |
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", |
262 |
"^(Measurement of the spin\\-dependent structure function).*" |
|
267 |
"^(Measurement of the spin\\-dependent structure function).*", |
|
268 |
"(?i)^.*authors['’′]? reply\.?$", |
|
269 |
"(?i)^.*authors['’′]? response\.?$" |
|
263 | 270 |
] |
264 |
}
|
|
271 |
} |
|
265 | 272 |
} |
266 | 273 |
} |
Also available in: Unified diff
aligned with the configuration on production