1 |
37017
|
claudio.at
|
<RESOURCE_PROFILE>
|
2 |
|
|
<HEADER>
|
3 |
42409
|
claudio.at
|
<RESOURCE_IDENTIFIER value="82b1c7fb-c36c-4291-8863-0393c7c588ee_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
|
4 |
37017
|
claudio.at
|
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
|
5 |
|
|
<RESOURCE_KIND value="DedupConfigurationDSResources"/>
|
6 |
|
|
<RESOURCE_URI value=""/>
|
7 |
|
|
<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
|
8 |
|
|
</HEADER>
|
9 |
|
|
<BODY>
|
10 |
|
|
<CONFIGURATION>
|
11 |
42409
|
claudio.at
|
<DESCRIPTION>1 - Person: Decision tree</DESCRIPTION>
|
12 |
37017
|
claudio.at
|
<DEDUPLICATION>
|
13 |
42409
|
claudio.at
|
{
|
14 |
|
|
"wf" : {
|
15 |
|
|
"threshold" : "1.0",
|
16 |
|
|
"dedupRun" : "001",
|
17 |
|
|
"entityType" : "person",
|
18 |
|
|
"orderField" : "fullname",
|
19 |
37017
|
claudio.at
|
"queueMaxSize" : "2000",
|
20 |
|
|
"groupMaxSize" : "10",
|
21 |
|
|
"slidingWindowSize" : "200",
|
22 |
38367
|
claudio.at
|
"rootBuilder" : [ "person" ],
|
23 |
42409
|
claudio.at
|
"includeChildren" : "true"
|
24 |
37017
|
claudio.at
|
},
|
25 |
42409
|
claudio.at
|
"pace" : {
|
26 |
37017
|
claudio.at
|
"clustering" : [
|
27 |
38367
|
claudio.at
|
{ "name" : "personclustering", "fields" : [ "person" ], "params" : { } }
|
28 |
42409
|
claudio.at
|
],
|
29 |
37017
|
claudio.at
|
"model" : [
|
30 |
42409
|
claudio.at
|
{ "name" : "fullname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person/metadata/fullname/value", "params" : { } },
|
31 |
|
|
{ "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "0.7", "ignoreMissing" : "false", "path" : "person", "params" : { "common.anchors" : "1", "common.surnames" : "3" } },
|
32 |
|
|
{ "name" : "lastname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "person/metadata/secondnames/value" }
|
33 |
37017
|
claudio.at
|
],
|
34 |
42409
|
claudio.at
|
"blacklists" : {
|
35 |
|
|
"lastname" : [
|
36 |
|
|
"(?i)^wang$",
|
37 |
|
|
"(?i)^~wang$",
|
38 |
|
|
"(?i)^zhang$",
|
39 |
|
|
"(?i)^zhou$",
|
40 |
|
|
"(?i)^zhao$",
|
41 |
|
|
"(?i)^li$",
|
42 |
|
|
"(?i)^~li$",
|
43 |
|
|
"(?i)^liu$",
|
44 |
|
|
"(?i)^chen$",
|
45 |
|
|
"(?i)^yang$",
|
46 |
|
|
"(?i)^kim$",
|
47 |
|
|
"(?i)^xu$",
|
48 |
|
|
"(?i)^huang$",
|
49 |
|
|
"(?i)^sun$",
|
50 |
|
|
"(?i)^lee$",
|
51 |
|
|
"(?i)^ma$",
|
52 |
|
|
"(?i)^kim$",
|
53 |
|
|
"(?i)^hu$",
|
54 |
|
|
"(?i)^wu$",
|
55 |
|
|
"(?i)^zhu$",
|
56 |
|
|
"(?i)^lu$"
|
57 |
|
|
]
|
58 |
|
|
}
|
59 |
37017
|
claudio.at
|
}
|
60 |
42409
|
claudio.at
|
}
|
61 |
37017
|
claudio.at
|
</DEDUPLICATION>
|
62 |
|
|
</CONFIGURATION>
|
63 |
|
|
<STATUS>
|
64 |
|
|
<LAST_UPDATE value="2001-12-31T12:00:00"/>
|
65 |
|
|
</STATUS>
|
66 |
|
|
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
|
67 |
|
|
</BODY>
|
68 |
|
|
</RESOURCE_PROFILE>
|