Project

General

Profile

1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2a81faf4-3412-48e2-a0a5-045636dedfb1_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
           	<DESCRIPTION>1 - Person: Merge by fullname</DESCRIPTION>
12
            <DEDUPLICATION>
13
			{
14
				"wf" : {
15
			        "threshold" : "1.0",
16
			        "dedupRun" : "001",
17
			        "entityType" : "person",
18
			        "orderField" : "person",
19
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22
			        "rootBuilder" : [ "person" ],
23
			        "includeChildren" : "true"
24
			    },
25
				"pace" : {
26
					"clustering" : [
27
						{ "name" : "personclustering", "fields" : [ "person" ], "params" : { } }
28
					],
29
					"model" : [
30
						{ "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "0", "ignoreMissing" : "false", "path" : "person" },
31
			            { "name" : "fullname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person/metadata/fullname/value", "params" : { } },
32
			            { "name" : "coanchors", "algo" : "PersonCoAnchorsDistance", "type" : "JSON", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person", "params" : { "common.anchors" : "1" } },
33
			            { "name" : "coauthorsurnames", "algo" : "PersonCoAuthorSurnamesDistance", "type" : "JSON", "weight" : "0.3", "ignoreMissing" : "false", "path" : "person", "params" : { "common.surnames" : "4" } },
34
			            { "name" : "lastname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "person/metadata/secondnames/value" }
35
					],
36
					"blacklists" : {
37
			            "lastname" : [
38
				            "(?i)^wang$",
39
				            "(?i)^~wang$",
40
				            "(?i)^zhang$",
41
				            "(?i)^zhou$",
42
				            "(?i)^zhao$",
43
				            "(?i)^li$",
44
				            "(?i)^~li$",
45
				            "(?i)^liu$",
46
				            "(?i)^chen$",
47
				            "(?i)^yang$",
48
				            "(?i)^kim$",
49
				            "(?i)^xu$",
50
				            "(?i)^huang$",
51
				            "(?i)^sun$",
52
				            "(?i)^lee$",
53
				            "(?i)^ma$",
54
				            "(?i)^kim$",
55
				            "(?i)^hu$",
56
				            "(?i)^wu$",
57
				            "(?i)^zhu$",
58
				            "(?i)^lu$"
59
			            ]
60
	                }
61
				}
62
			}
63
            </DEDUPLICATION>
64
        </CONFIGURATION>
65
        <STATUS>
66
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
67
        </STATUS>
68
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
69
    </BODY>
70
</RESOURCE_PROFILE>
(3-3/8)