Project

General

Profile

« Previous | Next » 

Revision 49437

cleanup

View differences:

modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/DedupConfigurationDSResources/DedupConfigurationDSResourceType/organization.step.01.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="98494a63-f5d1-46f7-9afd-e026c1dda913_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
           	<DESCRIPTION>1 - Organization: Merge by legalshortname</DESCRIPTION>
12
            <DEDUPLICATION>
13
			{ 
14
				"wf" : { 
15
			        "threshold" : "1.0", 
16
			        "dedupRun" : "001", 
17
			        "entityType" : "organization", 
18
			        "orderField" : "legalshortname", 
19
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22
			        "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ],
23
			        "includeChildren" : "true" 
24
			    },
25
				"pace" : {		
26
					"clustering" : [
27
						{ "name" : "spacetrimmingfieldvalue", "fields" : [ "legalshortname" ], "params" : { "randomLength" : "5" } }
28
					],		
29
					"model" : [
30
						{ "name" : "legalshortname", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" }
31
					],
32
					"blacklists" : { } 		
33
				}
34
			}            
35
            </DEDUPLICATION>
36
        </CONFIGURATION>
37
        <STATUS>
38
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
39
        </STATUS>
40
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
41
    </BODY>
42
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/DedupConfigurationDSResources/DedupConfigurationDSResourceType/organization.step.02.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="1d52dba7-1902-4c25-bf5b-3598f29ef11c_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
        	<DESCRIPTION>2 - Organization: Match against the legalname</DESCRIPTION>
12
            <DEDUPLICATION>
13
			{ 
14
				"wf" : { 
15
			        "threshold" : "0.95", 
16
			        "dedupRun" : "001", 
17
			        "entityType" : "organization", 
18
			        "orderField" : "legalname", 
19
			        "queueMaxSize" : "2000",
20
			        "groupMaxSize" : "10",
21
			        "slidingWindowSize" : "200",
22
			        "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy", "resultOrganization_affiliation_isAffiliatedWith" ],
23
			        "includeChildren" : "true" 
24
			    },
25
				"pace" : {		
26
					"clustering" : [
27
						{ "name" : "ngrampairs", "fields" : [ "legalname" ], "params" : { "max" : "1", "ngramLen" : "3"} },
28
						{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : "1", "len" : "3" } } 
29
					],		
30
					"model" : [
31
						{ "name" : "legalname", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" }
32
					],
33
					"blacklists" : { } 		
34
				}
35
			} 	
36
            </DEDUPLICATION>
37
        </CONFIGURATION>
38
        <STATUS>
39
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
40
        </STATUS>
41
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
42
    </BODY>
43
</RESOURCE_PROFILE>

Also available in: Unified diff