1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="c611ec67-eefc-4ffe-a5d4-cb3fc40a8bac_RGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZXMvRGVkdXBDb25maWd1cmF0aW9uRFNSZXNvdXJjZVR5cGU="/>
|
4
|
<RESOURCE_TYPE value="DedupConfigurationDSResourceType"/>
|
5
|
<RESOURCE_KIND value="DedupConfigurationDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2018-07-25T15:04:07+00:00"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
<DESCRIPTION>4 - Software: Match against the title, whose numbers must match</DESCRIPTION>
|
12
|
<DEDUPLICATION>
|
13
|
{
|
14
|
"wf" : {
|
15
|
"threshold" : "0.99",
|
16
|
"dedupRun" : "001",
|
17
|
"entityType" : "result",
|
18
|
"subEntityType" : "resulttype",
|
19
|
"subEntityValue" : "software",
|
20
|
"orderField" : "title",
|
21
|
"queueMaxSize" : "2000",
|
22
|
"groupMaxSize" : "10",
|
23
|
"slidingWindowSize" : "200",
|
24
|
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_isAffiliatedWith" ],
|
25
|
"includeChildren" : "true"
|
26
|
},
|
27
|
"pace" : {
|
28
|
"clustering" : [
|
29
|
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
30
|
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
|
31
|
{ "name" : "lowercase", "fields" : [ "doi", "url" ], "params" : { } }
|
32
|
],
|
33
|
"strictConditions" : [
|
34
|
{ "name" : "pidMatch", "fields" : [ "doi" ] },
|
35
|
{ "name" : "exactMatch", "fields" : [ "resulttype", "url" ] }
|
36
|
],
|
37
|
"conditions" : [
|
38
|
{ "name" : "titleVersionMatch", "fields" : [ "title" ] }
|
39
|
],
|
40
|
"model" : [
|
41
|
{ "name" : "doi", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value" },
|
42
|
{ "name" : "title", "algo" : "LevensteinTitle", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
|
43
|
{ "name" : "url", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/instance/url" },
|
44
|
{ "name" : "resulttype", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "false", "path" : "result/metadata/resulttype/classid" }
|
45
|
],
|
46
|
"blacklists" : {
|
47
|
|
48
|
}
|
49
|
}
|
50
|
}
|
51
|
</DEDUPLICATION>
|
52
|
</CONFIGURATION>
|
53
|
<STATUS>
|
54
|
<LAST_UPDATE value="2001-12-31T12:00:00"/>
|
55
|
</STATUS>
|
56
|
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
|
57
|
</BODY>
|
58
|
</RESOURCE_PROFILE>
|