Project

General

Profile

1
{
2
  "wf" : {
3
    "threshold" : "0.9",
4
    "dedupRun" : "001",
5
    "entityType" : "organization",
6
    "orderField" : "legalname",
7
    "queueMaxSize" : "2000",
8
    "groupMaxSize" : "50",
9
    "slidingWindowSize" : "200",
10
    "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
11
    "includeChildren" : "true"
12
  },
13
  "pace" : {
14
    "clustering" : [
15
      { "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
16
      { "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
17
      { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
18
    ],
19
    "strictConditions" : [
20
      { "name" : "exactMatch", "fields" : [ "gridid" ] }
21
    ],
22
    "conditions" : [
23
      { "name" : "exactMatch", "fields" : [ "country" ] },
24
      { "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
25
    ],
26
    "model" : [
27
      { "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" },
28
      { "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" },
29
      { "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
30
      { "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
31
      { "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
32
    ],
33
    "blacklists" : { }
34
  }
35
}
    (1-1/1)