Project

General

Profile

1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="82b1c7fb-c36c-4291-8863-0393c7c588ee_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
           	<DESCRIPTION>1 - Person: Decision tree</DESCRIPTION>
12
            <DEDUPLICATION>
13
			{
14
				"wf" : {
15
			        "threshold" : "1.0",
16
			        "dedupRun" : "001",
17
			        "entityType" : "person",
18
			        "orderField" : "fullname",
19
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22
			        "rootBuilder" : [ "person" ],
23
			        "includeChildren" : "true"
24
			    },
25
				"pace" : {
26
					"clustering" : [
27
						{ "name" : "personclustering", "fields" : [ "person" ], "params" : { } }
28
					],
29
					"model" : [
30
			            { "name" : "fullname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person/metadata/fullname/value", "params" : { } },
31
			            { "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "0.7", "ignoreMissing" : "false", "path" : "person", "params" : { "common.anchors" : "1", "common.surnames" : "3" } },
32
			            { "name" : "lastname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "person/metadata/secondnames/value" }
33
					],
34
					"blacklists" : {
35
			            "lastname" : [
36
				            "(?i)^wang$",
37
				            "(?i)^~wang$",
38
				            "(?i)^zhang$",
39
				            "(?i)^zhou$",
40
				            "(?i)^zhao$",
41
				            "(?i)^li$",
42
				            "(?i)^~li$",
43
				            "(?i)^liu$",
44
				            "(?i)^chen$",
45
				            "(?i)^yang$",
46
				            "(?i)^kim$",
47
				            "(?i)^xu$",
48
				            "(?i)^huang$",
49
				            "(?i)^sun$",
50
				            "(?i)^lee$",
51
				            "(?i)^ma$",
52
				            "(?i)^kim$",
53
				            "(?i)^hu$",
54
				            "(?i)^wu$",
55
				            "(?i)^zhu$",
56
				            "(?i)^lu$"
57
			            ]
58
	                }
59
				}
60
			}
61
            </DEDUPLICATION>
62
        </CONFIGURATION>
63
        <STATUS>
64
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
(3-3/7)