Project

General

Profile

1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="20d08926-ce57-443c-adef-c879c561d30d_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
        	<DESCRIPTION>1 - Organization: Match against the legalname and legalshortname</DESCRIPTION>
12
            <DEDUPLICATION>
13
			{ 
14
				"wf" : { 
15
			        "threshold" : "0.98", 
16
			        "dedupRun" : "001", 
17
			        "entityType" : "organization", 
18
			        "orderField" : "legalname", 
19
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22
			        "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ],
23
			        "includeChildren" : "true" 
24
			    },
25
				"pace" : {		
26
					"clustering" : [
27
						{ "name" : "ngrampairs", "fields" : [ "legalname" ], "params" : { "max" : "1", "ngramLen" : "3"} },
28
						{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : "1", "len" : "3" } },
29
						{ "name" : "spacetrimmingfieldvalue", "fields" : [ "legalshortname" ], "params" : { "randomLength" : "5" } }
30
					],
31
			  		"conditions" : [ 
32
			  			{ "name" : "exactMatch", "fields" : [ "country" ] }
33
			  		],					
34
					"model" : [
35
						{ "name" : "legalname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.7", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
36
						{ "name" : "legalshortname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.3", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
37
						{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" }
38
					],
39
					"blacklists" : { } 		
40
				}
41
			} 	
42
            </DEDUPLICATION>
43
        </CONFIGURATION>
44
        <STATUS>
45
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
46
        </STATUS>
47
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
48
    </BODY>
49
</RESOURCE_PROFILE>
(1-1/6)