Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="c611ec67-eefc-4ffe-a5d4-cb3fc40a8bab_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
4
		<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="DedupConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2018-07-25T15:04:07+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11
			<DESCRIPTION>3 - Other research product: group by DOI and match against the title, when PIDs are not available</DESCRIPTION>
12
			<DEDUPLICATION>
13
{
14
	"wf" : {
15
		"threshold" : "0.99",
16
		"dedupRun" : "001",
17
		"entityType" : "result",
18
		"subEntityType" : "resulttype",
19
		"subEntityValue" : "other",
20
		"orderField" : "title",
21
		"queueMaxSize" : "2000",
22
		"groupMaxSize" : "10",
23
		"slidingWindowSize" : "200",
24
		"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_isAffiliatedWith" ],
25
		"includeChildren" : "true"
26
	},
27
	"pace" : {
28
		"clustering" : [
29
			{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
30
			{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
31
			{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
32
		],
33
		"strictConditions" : [
34
			{ "name" : "pidMatch", "fields" : [ "pid" ] },
35
			{ "name" : "exactMatch", "fields" : [ "resulttype" ] }
36
		],
37
		"conditions" : [
38
			{ "name" : "titleVersionMatch", "fields" : [ "title" ] },
39
			{ "name" : "sizeMatch", "fields" : [ "authors" ] }
40
		],
41
		"model" : [
42
			{ "name" : "doi", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value" },
43
			{ "name" : "pid", "algo" : "Null", "type" : "JSON", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid", "overrideMatch" : "true" },
44
			{ "name" : "title", "algo" : "LevensteinTitle", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
45
			{ "name" : "authors", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/author/fullname" },
46
			{ "name" : "resulttype", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "false", "path" : "result/metadata/resulttype/classid" }
47
		],
48
		"blacklists" : {
49

    
50
		}
51
	}
52
}
53
			</DEDUPLICATION>
54
		</CONFIGURATION>
55
		<STATUS>
56
			<LAST_UPDATE value="2001-12-31T12:00:00"/>
57
		</STATUS>
58
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
	</BODY>
60
</RESOURCE_PROFILE>
(5-5/7)