Revision 49437
Added by Claudio Atzori over 6 years ago
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/DedupConfigurationDSResources/DedupConfigurationDSResourceType/organization.step.01.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="98494a63-f5d1-46f7-9afd-e026c1dda913_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="DedupConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<DESCRIPTION>1 - Organization: Merge by legalshortname</DESCRIPTION> |
|
12 |
<DEDUPLICATION> |
|
13 |
{ |
|
14 |
"wf" : { |
|
15 |
"threshold" : "1.0", |
|
16 |
"dedupRun" : "001", |
|
17 |
"entityType" : "organization", |
|
18 |
"orderField" : "legalshortname", |
|
19 |
"queueMaxSize" : "2000", |
|
20 |
"groupMaxSize" : "10", |
|
21 |
"slidingWindowSize" : "200", |
|
22 |
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ], |
|
23 |
"includeChildren" : "true" |
|
24 |
}, |
|
25 |
"pace" : { |
|
26 |
"clustering" : [ |
|
27 |
{ "name" : "spacetrimmingfieldvalue", "fields" : [ "legalshortname" ], "params" : { "randomLength" : "5" } } |
|
28 |
], |
|
29 |
"model" : [ |
|
30 |
{ "name" : "legalshortname", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" } |
|
31 |
], |
|
32 |
"blacklists" : { } |
|
33 |
} |
|
34 |
} |
|
35 |
</DEDUPLICATION> |
|
36 |
</CONFIGURATION> |
|
37 |
<STATUS> |
|
38 |
<LAST_UPDATE value="2001-12-31T12:00:00"/> |
|
39 |
</STATUS> |
|
40 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
41 |
</BODY> |
|
42 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/DedupConfigurationDSResources/DedupConfigurationDSResourceType/organization.step.02.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="1d52dba7-1902-4c25-bf5b-3598f29ef11c_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="DedupConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<DESCRIPTION>2 - Organization: Match against the legalname</DESCRIPTION> |
|
12 |
<DEDUPLICATION> |
|
13 |
{ |
|
14 |
"wf" : { |
|
15 |
"threshold" : "0.95", |
|
16 |
"dedupRun" : "001", |
|
17 |
"entityType" : "organization", |
|
18 |
"orderField" : "legalname", |
|
19 |
"queueMaxSize" : "2000", |
|
20 |
"groupMaxSize" : "10", |
|
21 |
"slidingWindowSize" : "200", |
|
22 |
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ], |
|
23 |
"includeChildren" : "true" |
|
24 |
}, |
|
25 |
"pace" : { |
|
26 |
"clustering" : [ |
|
27 |
{ "name" : "ngrampairs", "fields" : [ "legalname" ], "params" : { "max" : "1", "ngramLen" : "3"} }, |
|
28 |
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : "1", "len" : "3" } } |
|
29 |
], |
|
30 |
"model" : [ |
|
31 |
{ "name" : "legalname", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" } |
|
32 |
], |
|
33 |
"blacklists" : { } |
|
34 |
} |
|
35 |
} |
|
36 |
</DEDUPLICATION> |
|
37 |
</CONFIGURATION> |
|
38 |
<STATUS> |
|
39 |
<LAST_UPDATE value="2001-12-31T12:00:00"/> |
|
40 |
</STATUS> |
|
41 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
42 |
</BODY> |
|
43 |
</RESOURCE_PROFILE> |
Also available in: Unified diff
cleanup