1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="82b1c7fb-c36c-4291-8863-0393c7c588ee_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
|
4
|
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
|
5
|
<RESOURCE_KIND value="DedupConfigurationDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
<DESCRIPTION>1 - Person: Decision tree</DESCRIPTION>
|
12
|
<DEDUPLICATION>
|
13
|
{
|
14
|
"wf" : {
|
15
|
"threshold" : "1.0",
|
16
|
"dedupRun" : "001",
|
17
|
"entityType" : "person",
|
18
|
"orderField" : "fullname",
|
19
|
"queueMaxSize" : "2000",
|
20
|
"groupMaxSize" : "10",
|
21
|
"slidingWindowSize" : "200",
|
22
|
"rootBuilder" : [ "person" ],
|
23
|
"includeChildren" : "true"
|
24
|
},
|
25
|
"pace" : {
|
26
|
"clustering" : [
|
27
|
{ "name" : "personclustering", "fields" : [ "person" ], "params" : { } }
|
28
|
],
|
29
|
"model" : [
|
30
|
{ "name" : "fullname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person/metadata/fullname/value", "params" : { } },
|
31
|
{ "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "0.7", "ignoreMissing" : "false", "path" : "person", "params" : { "common.anchors" : "1", "common.surnames" : "3" } },
|
32
|
{ "name" : "lastname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "person/metadata/secondnames/value" }
|
33
|
],
|
34
|
"blacklists" : {
|
35
|
"lastname" : [
|
36
|
"(?i)^wang$",
|
37
|
"(?i)^~wang$",
|
38
|
"(?i)^zhang$",
|
39
|
"(?i)^zhou$",
|
40
|
"(?i)^zhao$",
|
41
|
"(?i)^li$",
|
42
|
"(?i)^~li$",
|
43
|
"(?i)^liu$",
|
44
|
"(?i)^chen$",
|
45
|
"(?i)^yang$",
|
46
|
"(?i)^kim$",
|
47
|
"(?i)^xu$",
|
48
|
"(?i)^huang$",
|
49
|
"(?i)^sun$",
|
50
|
"(?i)^lee$",
|
51
|
"(?i)^ma$",
|
52
|
"(?i)^kim$",
|
53
|
"(?i)^hu$",
|
54
|
"(?i)^wu$",
|
55
|
"(?i)^zhu$",
|
56
|
"(?i)^lu$"
|
57
|
]
|
58
|
}
|
59
|
}
|
60
|
}
|
61
|
</DEDUPLICATION>
|
62
|
</CONFIGURATION>
|
63
|
<STATUS>
|
64
|
<LAST_UPDATE value="2001-12-31T12:00:00"/>
|
65
|
</STATUS>
|
66
|
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
|
67
|
</BODY>
|
68
|
</RESOURCE_PROFILE>
|