1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="1d52dba7-1902-4c25-bf5b-3598f29ef11c_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
|
4
|
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
|
5
|
<RESOURCE_KIND value="DedupConfigurationDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
<DESCRIPTION>2 - Organization: Match against the legalname</DESCRIPTION>
|
12
|
<DEDUPLICATION>
|
13
|
{
|
14
|
"wf" : {
|
15
|
"threshold" : "0.95",
|
16
|
"dedupRun" : "001",
|
17
|
"entityType" : "organization",
|
18
|
"orderField" : "legalname",
|
19
|
"queueMaxSize" : "2000",
|
20
|
"groupMaxSize" : "10",
|
21
|
"slidingWindowSize" : "200",
|
22
|
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ],
|
23
|
"includeChildren" : "true"
|
24
|
},
|
25
|
"pace" : {
|
26
|
"clustering" : [
|
27
|
{ "name" : "ngrampairs", "fields" : [ "legalname" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
28
|
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : "1", "len" : "3" } }
|
29
|
],
|
30
|
"model" : [
|
31
|
{ "name" : "legalname", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" }
|
32
|
],
|
33
|
"blacklists" : { }
|
34
|
}
|
35
|
}
|
36
|
</DEDUPLICATION>
|
37
|
</CONFIGURATION>
|
38
|
<STATUS>
|
39
|
<LAST_UPDATE value="2001-12-31T12:00:00"/>
|
40
|
</STATUS>
|
41
|
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
|
42
|
</BODY>
|
43
|
</RESOURCE_PROFILE>
|