Project

General

Profile

1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="c611ec67-eefc-4ffe-a5d4-cb3fc40a8bag_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
	        <DESCRIPTION>1 - Publication: Match against the title, whose numbers must match</DESCRIPTION>
12
            <DEDUPLICATION>
13
    {
14
        "wf" : {
15
            "threshold" : "0.99",
16
		    "dedupRun" : "001",
17
		    "entityType" : "result",
18
		    "orderField" : "title",
19
		    "queueMaxSize" : "4000",
20
		    "groupMaxSize" : "40",
21
		    "slidingWindowSize" : "200",
22
		    "rootBuilder" : [ "result", "personResult_authorship_hasAuthor", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments" ],
23
		    "includeChildren" : "true",
24
		    "maxChildren" : "40"
25
        },
26
        "pace" : {
27
        "clustering" : [
28
            { "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
29
            { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
30
            { "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
31
        ],
32
        "strictConditions" : [
33
            { "name" : "pidMatch", "fields" : [ "pid" ] }
34
        ],
35
        "conditions" : [
36
		    { "name" : "titleVersionMatch", "fields" : [ "title" ] },
37
		    { "name" : "sizeMatch", "fields" : [ "authors" ] }
38
	    ],
39
	    "model" : [
40
            { "name" : "doi", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value" },
41
            { "name" : "pid", "algo" : "Null", "type" : "JSON", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid", "overrideMatch" : "true" },
42
		    { "name" : "title", "algo" : "LevensteinTitle", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
43
		    { "name" : "authors", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" }
44
	    ],
45
	    "blacklists" : {
46
		    "title" : [
47
			    "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
48
			    "^Problems with perinatal pathology\.?$",
49
			    "(?i)^Cases? of Puerperal Convulsions$",
50
			    "(?i)^Operative Gyna?ecology$",
51
			    "(?i)^Mind the gap\!?\:?$",
52
			    "^Chronic fatigue syndrome\.?$",
53
			    "^Cartas? ao editor Letters? to the Editor$",
54
			    "^Note from the Editor$",
55
			    "^Anesthesia Abstract$",
56
			    "^Annual report$",
57
			    "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\.?”?$",
58
			    "(?i)^Graph and Table of Infectious Diseases?$",
59
			    "^Presentation$",
60
			    "(?i)^Reviews and Information on Publications$",
61
			    "(?i)^PUBLIC HEALTH SERVICES?$",
62
			    "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
63
			    "(?i)^Adrese autora$",
64
			    "(?i)^Systematic Part .*\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
65
			    "(?i)^Acknowledgement to Referees$",
66
			    "(?i)^Behçet's disease\.?$",
67
			    "(?i)^Isolation and identification of restriction endonuclease.*$",
68
			    "(?i)^CEREBROVASCULAR DISEASES?.?$",
69
			    "(?i)^Screening for abdominal aortic aneurysms?\.?$",
70
			    "^Event management$",
71
			    "(?i)^Breakfast and Crohn's disease.*\.?$",
72
			    "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\..*\.$",
73
			    "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\.?$",
74
			    "^Gushi hakubutsugaku$",
75
			    "^Starobosanski nadpisi u Bosni i Hercegovini \(.*\)$",
76
			    "^Intestinal spirocha?etosis$",
77
			    "^Treatment of Rodent Ulcer$",
78
			    "(?i)^\W*Cloud Computing\W*$",
79
			    "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
80
			    "^Free Communications, Poster Presentations: Session [A-F]$",
81
			    "^“The Historical Aspects? of Quackery\.?”$",
82
			    "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
83
			    "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
84
			    "(?i)^Case Report$",
85
			    "^Boletín Informativo$",
86
			    "(?i)^Glioblastoma Multiforme$",
87
			    "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
88
			    "^Zaměstnanecké výhody$",
89
			    "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
90
			    "(?i)^Carotid body tumours?\\.?$",
91
			    "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
92
			    "^Avant-propos$",
93
			    "(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
94
			    "(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
95
			    "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
96
			    "^Viñetas de Cortázar$",
97
			    "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\.)?$",
98
			    "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\.?)$",
99
			    "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
100
			    "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
101
			    "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
102
			    "^Aus der AGMB$",
103
			    "^Znanstveno-stručni prilozi$",
104
			    "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
105
			    "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
106
			    "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
107
			    "^Finanční analýza podniku$",
108
			    "^Financial analysis( of business)?$",
109
			    "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
110
			    "^Jikken nihon shūshinsho$",
111
			    "(?i)^CORONER('|s)(s|') INQUESTS$",
112
			    "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
113
			    "(?i)^Consultants' contract(s)?$",
114
			    "(?i)^Upute autorima$",
115
			    "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
116
			    "^Joshi shin kokubun$",
117
			    "^Kōtō shōgaku dokuhon nōson'yō$",
118
			    "^Jinjō shōgaku shōka$",
119
			    "^Shōgaku shūjichō$",
120
			    "^Nihon joshi dokuhon$",
121
			    "^Joshi shin dokuhon$",
122
			    "^Chūtō kanbun dokuhon$",
123
			    "^Wabun dokuhon$",
124
			    "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
125
			    "(?i)^cardiac rehabilitation$",
126
			    "(?i)^Analytical summary$",
127
			    "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
128
			    "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
129
			    "^Prikazi i osvrti$",
130
			    "^Rodinný dům s provozovnou$",
131
			    "^Family house with an establishment$",
132
			    "^Shinsei chūtō shin kokugun$",
133
			    "^Pulmonary alveolar proteinosis(\\.?)$",
134
			    "^Shinshū kanbun$",
135
			    "^Viñeta(s?) de Rodríguez$",
136
			    "(?i)^RUBRIKA UREDNIKA$",
137
			    "^A Matching Model of the Academic Publication Market$",
138
			    "^Yōgaku kōyō$",
139
			    "^Internetový marketing$",
140
			    "^Internet marketing$",
141
			    "^Chūtō kokugo dokuhon$",
142
			    "^Kokugo dokuhon$",
143
			    "^Antibiotic Cover for Dental Extraction(s?)$",
144
			    "^Strategie podniku$",
145
			    "^Strategy of an Enterprise$",
146
			    "(?i)^respiratory disease(s?)(\.?)$",
147
			    "^Award(s?) for Gallantry in Civil Defence$",
148
			    "^Podniková kultura$",
149
			    "^Corporate Culture$",
150
			    "^Severe hyponatraemia in hospital inpatient(s?)(\.?)$",
151
			    "^Pracovní motivace$",
152
			    "^Work Motivation$",
153
			    "^Kaitei kōtō jogaku dokuhon$",
154
			    "^Konsolidovaná účetní závěrka$",
155
			    "^Consolidated Financial Statements$",
156
			    "(?i)^intracranial tumour(s?)$",
157
			    "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
158
			    "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
159
			    "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
160
			    "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
161
			    "^Úroveň motivačního procesu jako způsobu vedení lidí$",
162
			    "^The level of motivation process as a leadership$",
163
			    "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
164
			    "(?i)^news and events$",
165
			    "(?i)^NOVOSTI I DOGAĐAJI$",
166
			    "^Sansū no gakushū$",
167
			    "^Posouzení informačního systému firmy a návrh změn$",
168
			    "^Information System Assessment and Proposal for ICT Modification$",
169
			    "^Stresové zatížení pracovníků ve vybrané profesi$",
170
			    "^Stress load in a specific job$",
171
			    "^Sunday: Poster Sessions, Pt.*$",
172
			    "^Monday: Poster Sessions, Pt.*$",
173
			    "^Wednesday: Poster Sessions, Pt.*",
174
			    "^Tuesday: Poster Sessions, Pt.*$",
175
			    "^Analýza reklamy$",
176
			    "^Analysis of advertising$",
177
			    "^Shōgaku shūshinsho$",
178
			    "^Shōgaku sansū$",
179
			    "^Shintei joshi kokubun$",
180
			    "^Taishō joshi kokubun dokuhon$",
181
			    "^Joshi kokubun$",
182
			    "^Účetní uzávěrka a účetní závěrka v ČR$",
183
			    "(?i)^The \"?Causes\"? of Cancer$",
184
			    "^Normas para la publicación de artículos$",
185
			    "^Editor('|s)(s|') [Rr]eply$",
186
			    "^Editor(’|s)(s|’) letter$",
187
			    "^Redaktoriaus žodis$",
188
			    "^DISCUSSION ON THE PRECEDING PAPER$",
189
			    "^Kōtō shōgaku shūshinsho jidōyō$",
190
			    "^Shōgaku nihon rekishi$",
191
			    "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
192
			    "^Préface$",
193
			    "^Occupational [Hh]ealth [Ss]ervices.$",
194
			    "^In Memoriam Professor Toshiyuki TAKESHIMA$",
195
			    "^Účetní závěrka ve vybraném podniku.*$",
196
			    "^Financial statements in selected company$",
197
			    "^Abdominal [Aa]ortic [Aa]neurysms.*$",
198
			    "^Pseudomyxoma peritonei$",
199
			    "^Kazalo autora$",
200
			    "(?i)^uvodna riječ$",
201
			    "^Motivace jako způsob vedení lidí$",
202
			    "^Motivation as a leadership$",
203
			    "^Polyfunkční dům$",
204
			    "^Multi\\-funkcional building$",
205
			    "^Podnikatelský plán$",
206
			    "(?i)^Podnikatelský záměr$",
207
			    "(?i)^Business Plan$",
208
			    "^Oceňování nemovitostí$",
209
			    "^Marketingová komunikace$",
210
			    "^Marketing communication$",
211
			    "^Sumario Analítico$",
212
			    "^Riječ uredništva$",
213
			    "^Savjetovanja i priredbe$",
214
			    "^Índice$",
215
			    "^(Starobosanski nadpisi).*$",
216
			    "^Vzdělávání pracovníků v organizaci$",
217
			    "^Staff training in organization$",
218
			    "^(Life Histories of North American Geometridae).*$",
219
			    "^Strategická analýza podniku$",
220
			    "^Strategic Analysis of an Enterprise$",
221
			    "^Sadržaj$",
222
			    "^Upute suradnicima$",
223
			    "^Rodinný dům$",
224
			    "(?i)^Fami(l)?ly house$",
225
			    "^Upute autorima$",
226
			    "^Strategic Analysis$",
227
			    "^Finanční analýza vybraného podniku$",
228
			    "^Finanční analýza$",
229
			    "^Riječ urednika$",
230
			    "(?i)^Content(s?)$",
231
			    "(?i)^Inhalt$",
232
			    "^Jinjō shōgaku shūshinsho jidōyō$",
233
			    "(?i)^Index$",
234
			    "^Chūgaku kokubun kyōkasho$",
235
			    "^Retrato de una mujer$",
236
			    "^Retrato de un hombre$",
237
			    "^Kōtō shōgaku dokuhon$",
238
			    "^Shotōka kokugo$",
239
			    "^Shōgaku dokuhon$",
240
			    "^Jinjō shōgaku kokugo dokuhon$",
241
			    "^Shinsei kokugo dokuhon$",
242
			    "^Teikoku dokuhon$",
243
			    "^Instructions to Authors$",
244
			    "^KİTAP TAHLİLİ$",
245
			    "^PRZEGLĄD PIŚMIENNICTWA$",
246
			    "(?i)^Presentación$",
247
			    "^İçindekiler$",
248
			    "(?i)^Tabl?e of contents$",
249
			    "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
250
			    "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
251
			    "^Editorial( Board)?$",
252
			    "(?i)^Editorial \\(English\\)$",
253
			    "^Editörden$",
254
			    "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
255
			    "^(Kiri Karl Morgensternile).*$",
256
			    "^(\\[Eksliibris Aleksandr).*\\]$",
257
			    "^(\\[Eksliibris Aleksandr).*$",
258
			    "^(Eksliibris Aleksandr).*$",
259
			    "^(Kiri A\\. de Vignolles).*$",
260
			    "^(2 kirja Karl Morgensternile).*$",
261
			    "^(Pirita kloostri idaosa arheoloogilised).*$",
262
			    "^(Kiri tundmatule).*$",
263
			    "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
264
			    "^(Eksliibris Nikolai Birukovile).*$",
265
			    "^(Eksliibris Nikolai Issakovile).*$",
266
			    "^(WHP Cruise Summary Information of section).*$",
267
			    "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
268
			    "^(Measurement of the spin\\-dependent structure function).*",
269
			    "(?i)^.*authors['’′]? reply\.?$",
270
			    "(?i)^.*authors['’′]? response\.?$"
271
            ]
272
	    }
273
        }
274
    }
275
            </DEDUPLICATION>
276
        </CONFIGURATION>
277
        <STATUS>
278
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
279
        </STATUS>
280
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
281
    </BODY>
282
</RESOURCE_PROFILE>
(4-4/4)