1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="50cbbb8f-b9d4-486d-9895-a80ca7b963b9_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
|
4
|
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
|
5
|
<RESOURCE_KIND value="TransformationRuleDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2018-04-10T15:35:05+00:00"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
<IMPORTED/>
|
12
|
<SCRIPT>
|
13
|
<TITLE>dc_cleaning_OPENAIREplus_compliant_dcidentifier_last</TITLE>
|
14
|
<CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_dcidentifier_last";
|
15
|
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
16
|
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
17
|
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
18
|
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
19
|
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
20
|
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
|
21
|
$var0 = "''";
|
22
|
|
23
|
//$var1 = "'corda_______::'";
|
24
|
//$var2 = "'corda__h2020::'";
|
25
|
$varFP7 = "'corda_______::'";
|
26
|
$varH2020 = "'corda__h2020::'";
|
27
|
$varAKA = "'aka_________::'"; // tbd, no statements yet
|
28
|
$varAFF = "'aff_________::'";
|
29
|
$varARC = "'arc_________::'";
|
30
|
$varCONICYT = "'conicytf____::'";
|
31
|
$varDFG = "'dfgf________::'";
|
32
|
$varFCT="'fct_________::'";
|
33
|
$varFWF = "'fwf_________::'";
|
34
|
$varHRZZ = "'irb_hr______::'";
|
35
|
$varMESTD = "'mestd_______::'";
|
36
|
$varMIUR = "'miur________::'"; // tbd, no statements yet
|
37
|
$varMZOS = "'irb_hr______::'";
|
38
|
$varNHMRC = "'nhmrc_______::'";
|
39
|
$varNIH = "'nih_________::'";
|
40
|
$varNSF = "'nsf_________::'";
|
41
|
$varNWO = "'nwo_________::'";
|
42
|
$varRCUK = "'rcuk________::'";
|
43
|
$varSFI ="'sfi_________::'";
|
44
|
$varSGOV = "'sgov________::'";
|
45
|
$varSNSF = "'snsf________::'";
|
46
|
$varTARA = "'taraexp_____::'";
|
47
|
$varTUBITAK = "'tubitakf____::'";
|
48
|
$varWT = "'wt__________::'";
|
49
|
|
50
|
$varDummy = "''";
|
51
|
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
52
|
static $varRepoid = xpath:"//dri:repositoryId";
|
53
|
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
|
54
|
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
55
|
dri:repositoryId = $varRepoid;
|
56
|
// this can be made easier
|
57
|
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
|
58
|
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
59
|
if xpath:"//dc:title[string-length(.)> 0]" $varDummy = "''"; else dc:coverage = skipRecord();
|
60
|
dc:title = xpath:"//dc:title[string-length(.) > 0]/normalize-space(.)";
|
61
|
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
62
|
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
|
63
|
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
|
64
|
dc:contributor = xpath:"//dc:contributor";
|
65
|
dc:description = xpath:"//dc:description";
|
66
|
dc:format = xpath:"//dc:format";
|
67
|
$varHttpTest = "''";
|
68
|
if xpath:"//dc:identifier[starts-with(., 'http')][last()]" $varHttpTest = "true"; else dc:coverage = skipRecord();
|
69
|
apply xpath:"//dc:identifier" if xpath:"compare(normalize-space(.), //dc:identifier[starts-with(., 'http')][last()]) = 0" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
70
|
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
71
|
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
72
|
// dc:type = xpath:"//dc:type";
|
73
|
dc:language = Convert(xpath:"//dc:language", Languages);
|
74
|
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
|
75
|
|
76
|
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
77
|
dc:date = xpath:"//dc:date";
|
78
|
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
79
|
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
|
80
|
|
81
|
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 or string-length(substring-before(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/'), '/')) = 6" oaf:projectid = RegExpr(xpath:"substring(normalize-space(.),1,41)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else $varDummy = "''";
|
82
|
//dc:relation = xpath:"normalize-space(.)";
|
83
|
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/H2020/')) = 6 or string-length(substring-before(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/H2020/'), '/')) = 6" oaf:projectid = RegExpr(xpath:"substring(normalize-space(.),1,43)", $var2, "s/^(.*info:eu-repo\/grantAgreement\/EC\/H2020\/)//gmi"); else $varDummy = "''";
|
84
|
|
85
|
// FP7
|
86
|
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', '$4', 'i')))";
|
87
|
// ERC (provided by OAPEN)
|
88
|
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
89
|
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
90
|
// H2020
|
91
|
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', '$4', 'i')))";
|
92
|
// FCT
|
93
|
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))";
|
94
|
// MESTD
|
95
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
|
96
|
// WT
|
97
|
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
98
|
// AFF
|
99
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))";
|
100
|
// ARC ([A-Z]+[\d/]*|\d+)
|
101
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))";
|
102
|
// CONICYT \d{7,8}
|
103
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))";
|
104
|
// DFG \d{7,9}
|
105
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))";
|
106
|
// FCT
|
107
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/.*/.*?(\d+).*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]*\d+[^/]*)(/.*)*$', '$1', 'i'))";
|
108
|
// FWF [A-Z]{1,3} \d*
|
109
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))";
|
110
|
// HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)?
|
111
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))";
|
112
|
// MESTD
|
113
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
|
114
|
// MZOS \d{3}-\d{7}-\d{4}
|
115
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))";
|
116
|
// NHMRC \d{3,6}
|
117
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', '$1', 'i'))";
|
118
|
// NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)
|
119
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-?[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', '$1', 'i'))";
|
120
|
// NSF (\d{7}|\d{2}[A-Z]\d{4})
|
121
|
//
|
122
|
// SNSF
|
123
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
|
124
|
// TUBITAK
|
125
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/(\d{3}[A-Z]\d{2,3})(/.*)?', '$1', 'i'))";
|
126
|
// WT
|
127
|
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i'))";
|
128
|
|
129
|
apply xpath:"//dc:relation" if xpath:"not(contains(., 'info:eu-repo/grantAgreement/'))" dc:relation = xpath:"normalize-space(.)"; else $varDummy = "''";
|
130
|
|
131
|
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
132
|
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
133
|
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
134
|
//
|
135
|
//
|
136
|
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
|
137
|
//
|
138
|
|
139
|
//apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
|
140
|
$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
|
141
|
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes);
|
142
|
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;);
|
143
|
|
144
|
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
145
|
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
|
146
|
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
|
147
|
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
|
148
|
//
|
149
|
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
150
|
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
151
|
//
|
152
|
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
153
|
$varIsbn = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______4039'] and starts-with(., '978-') or starts-with(., '979-')]";
|
154
|
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
|
155
|
oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";);
|
156
|
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
157
|
|
158
|
$varISSN = xpath:"//dc:relation[matches(normalize-space(.), '^\d{4}-\d{3}[\dX]$')]/normalize-space(.)";
|
159
|
$varJournalTitle = xpath:"//dc:relation[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')]][1]/replace(., '([^,;.]*)[,;.].*', '$1')";
|
160
|
$varJournalSp = xpath:"//dc:format[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')] and matches(., 'S\. \d*-\d*')]/substring-before(substring-after(., 'S. '), '-')";
|
161
|
$varJournalEp = xpath:"//dc:format[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')] and matches(., 'S\. \d*-\d*')]/substring-after(., '-')";
|
162
|
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @sp = xpath:"$varJournalSp";, @ep = xpath:"$varJournalEp";);
|
163
|
|
164
|
end</CODE>
|
165
|
</SCRIPT>
|
166
|
</CONFIGURATION>
|
167
|
<STATUS/>
|
168
|
<SECURITY_PARAMETERS/>
|
169
|
</BODY>
|
170
|
</RESOURCE_PROFILE>
|