Project

General

Profile

1 37017 claudio.at
<RESOURCE_PROFILE>
2
    <HEADER>
3 42409 claudio.at
        <RESOURCE_IDENTIFIER value="82b1c7fb-c36c-4291-8863-0393c7c588ee_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4 37017 claudio.at
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11 42409 claudio.at
           	<DESCRIPTION>1 - Person: Decision tree</DESCRIPTION>
12 37017 claudio.at
            <DEDUPLICATION>
13 42409 claudio.at
			{
14
				"wf" : {
15
			        "threshold" : "1.0",
16
			        "dedupRun" : "001",
17
			        "entityType" : "person",
18
			        "orderField" : "fullname",
19 37017 claudio.at
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22 38367 claudio.at
			        "rootBuilder" : [ "person" ],
23 42409 claudio.at
			        "includeChildren" : "true"
24 37017 claudio.at
			    },
25 42409 claudio.at
				"pace" : {
26 37017 claudio.at
					"clustering" : [
27 38367 claudio.at
						{ "name" : "personclustering", "fields" : [ "person" ], "params" : { } }
28 42409 claudio.at
					],
29 37017 claudio.at
					"model" : [
30 42409 claudio.at
			            { "name" : "fullname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person/metadata/fullname/value", "params" : { } },
31
			            { "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "0.7", "ignoreMissing" : "false", "path" : "person", "params" : { "common.anchors" : "1", "common.surnames" : "3" } },
32
			            { "name" : "lastname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "person/metadata/secondnames/value" }
33 37017 claudio.at
					],
34 42409 claudio.at
					"blacklists" : {
35
			            "lastname" : [
36
				            "(?i)^wang$",
37
				            "(?i)^~wang$",
38
				            "(?i)^zhang$",
39
				            "(?i)^zhou$",
40
				            "(?i)^zhao$",
41
				            "(?i)^li$",
42
				            "(?i)^~li$",
43
				            "(?i)^liu$",
44
				            "(?i)^chen$",
45
				            "(?i)^yang$",
46
				            "(?i)^kim$",
47
				            "(?i)^xu$",
48
				            "(?i)^huang$",
49
				            "(?i)^sun$",
50
				            "(?i)^lee$",
51
				            "(?i)^ma$",
52
				            "(?i)^kim$",
53
				            "(?i)^hu$",
54
				            "(?i)^wu$",
55
				            "(?i)^zhu$",
56
				            "(?i)^lu$"
57
			            ]
58
	                }
59 37017 claudio.at
				}
60 42409 claudio.at
			}
61 37017 claudio.at
            </DEDUPLICATION>
62
        </CONFIGURATION>
63
        <STATUS>
64
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>