Revision 59905
Added by Miriam Baglioni over 3 years ago
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc_cleaning_OPENAIREplus_compliant_BASE.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="8683d932-8ce9-4f30-a2a5-e332a682f1b4_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2020-07-17T19:30:25+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_BASE</TITLE> |
|
14 |
<CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_BASE"; |
|
15 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
16 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
17 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
18 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
19 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
20 |
declare_ns xs = "http://www.w3.org/2001/XMLSchema"; // |
|
21 |
$var0 = "''"; |
|
22 |
$varFP7 = "'corda_______::'"; |
|
23 |
$varH2020 = "'corda__h2020::'"; |
|
24 |
$varAKA = "'aka_________::'"; |
|
25 |
$varARC = "'arc_________::'"; |
|
26 |
$varCONICYT = "'conicytf____::'"; |
|
27 |
$varDFG = "'dfgf________::'"; |
|
28 |
$varFCT="'fct_________::'"; |
|
29 |
$varFWF = "'fwf_________::'"; |
|
30 |
$varHRZZ = "'irb_hr______::'"; // HRZZ not found within BASE |
|
31 |
$varMESTD = "'mestd_______::'"; |
|
32 |
$varMZOS = "'irb_hr______::'"; |
|
33 |
$varNHMRC = "'nhmrc_______::'"; |
|
34 |
$varNIH = "'nih_________::'"; |
|
35 |
$varNSF = "'nsf_________::'"; |
|
36 |
$varNWO = "'nwo_________::'"; |
|
37 |
$varRCUK = "'rcuk________::'"; |
|
38 |
$varSFI ="'sfi_________::'"; |
|
39 |
$varSGOV = "'sgov________::'"; // to be added, awaiting DOI from Pilar, found project ids not in CSV list? |
|
40 |
$varSNSF = "'snsf________::'"; |
|
41 |
$varTARA = "'taraexp_____::'"; // to be added, awaiting DOI from André |
|
42 |
$varTUBITAK = "'tubitakf____::'"; |
|
43 |
$varWT = "'wt__________::'"; |
|
44 |
|
|
45 |
$varDummy = "''"; |
|
46 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
47 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
48 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]); |
|
49 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
50 |
dri:repositoryId = $varRepoid; |
|
51 |
dri:recordIdentifier = xpath:"//dri:recordIdentifier"; |
|
52 |
|
|
53 |
// skipping records |
|
54 |
// type AMA (Ask Me Anything), type journal, type dataset (which are not datasets) |
|
55 |
if xpath:"//*[local-name()='family'][. = 'r/Science']" dc:type = skipRecord(); else $varDummy = "''"; |
|
56 |
if xpath:"//*[local-name()='items']/*[local-name()='type'][. = 'journal' or . = 'dataset']" dc:type = skipRecord(); else $varDummy = "''"; |
|
57 |
// empty or missing title |
|
58 |
if xpath:"not(//*[local-name()='title']) or not(//*[local-name()='title'][string-length(.) > 0])" dc:title = skipRecord(); else $varDummy = "''"; |
|
59 |
// empty or missing creator |
|
60 |
if xpath:"not(//*[local-name()='author'][string-length(./*[local-name()='family']) + string-length(./*[local-name()='given']) > 0])" dc:creator = skipRecord(); else $varDummy = "''"; |
|
61 |
|
|
62 |
// creator |
|
63 |
//apply xpath:"//*[local-name()='author']" if xpath:"string-length(./*[local-name()='family']) + string-length(./*[local-name()='given']) > 0 and not(./*[local-name()='ORCID'])" dc:creator = xpath:"concat(normalize-space(./*[local-name()='family']), ./*[local-name()='suffix'][not(starts-with(lower-case(.), 'prof') or starts-with(lower-case(.), 'dr') or starts-with(lower-case(.), 'phd') or starts-with(lower-case(.), 'md'))]/concat(' ', normalize-space(.)), ', ', normalize-space(./*[local-name()='given']))"; else $varDummy = "''"; |
|
64 |
//apply xpath:"//*[local-name()='author']" if xpath:"string-length(./*[local-name()='family']) + string-length(./*[local-name()='given']) > 0 and ./*[local-name()='ORCID']" dc:creator = set(xpath:"concat(normalize-space(./*[local-name()='family']), ./*[local-name()='suffix'][not(starts-with(lower-case(.), 'prof') or starts-with(lower-case(.), 'dr') or starts-with(lower-case(.), 'phd') or starts-with(lower-case(.), 'md'))]/concat(' ', normalize-space(.)), ', ', normalize-space(./*[local-name()='given']))", @nameIdentifierScheme = xpath:"orcid";, @nameIdentifier = xpath:"./*[local-name()='ORCID']/substring-after(., 'http://orcid.org/')";); else $varDummy = "''"; |
|
65 |
$varOrcidName = xpath:"//*[local-name()='author'][string-length(./*[local-name()='family']) + string-length(./*[local-name()='given']) > 0]/concat(normalize-space(./*[local-name()='family']), ./*[local-name()='suffix'][not(starts-with(lower-case(.), 'prof') or starts-with(lower-case(.), 'dr') or starts-with(lower-case(.), 'phd') or starts-with(lower-case(.), 'md'))]/concat(' ', normalize-space(.)), ', ', normalize-space(./*[local-name()='given']))"; |
|
66 |
$varOrcidOrcid = xpath:"//*[local-name()='author'][string-length(./*[local-name()='family']) + string-length(./*[local-name()='given']) > 0]/substring-after(./*[local-name()='ORCID'], 'http://orcid.org/')"; |
|
67 |
dc:creator = set(xpath:"$varOrcidName", @nameIdentifier = xpath:"subsequence($varOrcidOrcid,position(),1)";, @nameIdentifierScheme=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','ORCID')";, @schemeUri=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','http://orcid.org/')";); |
|
68 |
|
|
69 |
// title |
|
70 |
apply xpath:"//*[local-name()='title']" if xpath:"string-length(normalize-space(.)) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
71 |
|
|
72 |
// subjects here seem not to refer to vocabularies |
|
73 |
apply xpath:"//*[local-name()='subject']" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
74 |
|
|
75 |
// publisher |
|
76 |
apply xpath:"//*[local-name()='publisher']" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
77 |
|
|
78 |
$varHttpTest = "''"; |
|
79 |
// identifier |
|
80 |
apply xpath:"//*[local-name()='URL'][starts-with(normalize-space(.), 'http://dx.doi.org')]" if xpath:"." dc:identifier = xpath:"normalize-space(.)"; else $var0 = "''"; |
|
81 |
|
|
82 |
dr:dateOfCollection = xpath:"//dri:dateOfCollection"; |
|
83 |
static dr:dateOfTransformation = xpath:"current-dateTime()"; |
|
84 |
//###BASE type |
|
85 |
dc:type = xpath:"//*[local-name()='items']/*[local-name()='type']"; |
|
86 |
|
|
87 |
// date |
|
88 |
dc:date = xpath:"//*[local-name()='published-print' or local-name()='published-online']/*[local-name()='date-parts']/string-join(*[local-name()='array'], '-')"; |
|
89 |
// first check whether //published-print/date-parts (or published-online) contains at most 3 (array) fields, with apt lengths; then alternate the values with '-', giving month/day standard length or dummy values before. |
|
90 |
apply xpath:"(//*[local-name()='published-print' or local-name()='published-online'], //*[local-name()='issued'][not(//*[local-name()='array' and contains(., 'null')])][not(//*[local-name()='published-print' or local-name()='published-online'])])/*[local-name()='date-parts'][max((count(*),3))=3 and string-length(*[1])=4 and max((subsequence(*/string-length(),2,2),2))=2]" if xpath:"." oaf:dateAccepted = xpath:"string-join((*[1], substring(substring(concat('0',*[2],'1'),string-length(*[2])),1,2), substring(substring(concat('0',*[3],'1'),string-length(*[3])),1,2)), '-')"; else $varDummy = "''"; |
|
91 |
|
|
92 |
// FP7 |
|
93 |
//oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i'))"; |
|
94 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Seventh Framework Program')], ./*[local-name()='DOI' and .='10.13039/100011102']]/*[local-name()='award' and matches(., '^\d{6}$')]/concat($varFP7, .)"; |
|
95 |
// H2020 |
|
96 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Horizon 2020') or contains(., 'H2020')], ./*[local-name()='DOI' and .='10.13039/100010661']]/*[local-name()='award' and matches(., '^\d{6}$')]/concat($varH2020, .)"; |
|
97 |
// AKA |
|
98 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Suomen Akatemia') or contains(., 'Academy of Finland'))], ./*[local-name()='DOI' and .='10.13039/501100002341']]/*[local-name()='award']/concat($varAKA, .)"; |
|
99 |
// ARC |
|
100 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Australian Research Council')], ./*[local-name()='DOI' and .='10.13039/501100000923']]/*[local-name()='award' and matches(., '^\d{6}$')]/concat($varARC, .)"; |
|
101 |
// CONICYT |
|
102 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Comisión Nacional de Investigación Científica y Tecnológica') or contains(., 'CONICYT'))], ./*[local-name()='DOI' and .='10.13039/501100002848']]/*[local-name()='award']/concat($varCONICYT, .)"; |
|
103 |
// DFG |
|
104 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Deutsche Forschungsgemeinschaft') or contains(., 'DFG'))], ./*[local-name()='DOI' and .='10.13039/501100001659']]/*[local-name()='award']/concat($varDFG, .)"; |
|
105 |
// FCT |
|
106 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Fundação para a Ciência e a Tecnologia')], ./*[local-name()='DOI' and .='10.13039/501100001871']]/*[local-name()='award']/concat($varFCT, .)"; |
|
107 |
// FWF |
|
108 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Fonds zur Förderung der Wissenschaftlichen Forschung') or contains(., 'Austrian Science Fund')], ./*[local-name()='DOI' and .='10.13039/501100002428']]/*[local-name()='award']/concat($varFWF, .)"; |
|
109 |
// MESTD |
|
110 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and ((contains(., 'Ministarstvo Prosvete, Nauke i Tehnolo') and contains(., 'kog Razvoja')) or contains(., 'MESTD'))], ./*[local-name()='DOI' and .='10.13039/501100001871']]/*[local-name()='award']/concat($varMESTD, .)"; |
|
111 |
// MZOS |
|
112 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Ministarstvo Znanosti, Obrazovanja i Sporta') or contains(., 'Ministry of Science, Education and Sports'))], ./*[local-name()='DOI' and .='10.13039/501100006588']]/*[local-name()='award']/concat($varMZOS, .)"; |
|
113 |
// NHMRC |
|
114 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'National Health and Medical Research Council') or contains(., 'NHMRC'))], ./*[local-name()='DOI' and .='10.13039/501100000925']]/*[local-name()='award']/concat($varNHMRC, .)"; |
|
115 |
// NIH |
|
116 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'National Institutes of Health')], ./*[local-name()='DOI' and .='10.13039/100000002']]/*[local-name()='award']/concat($varNIH, .)"; |
|
117 |
// NSF |
|
118 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'National Science Foundation')], ./*[local-name()='DOI' and .='10.13039/100000001']]/*[local-name()='award']/concat($varNSF, .)"; |
|
119 |
// NWO |
|
120 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Netherlands Organisation for Scientific Research') or contains(., 'Nederlandse Organisatie voor Wetenschappelijk Onderzoek')], ./*[local-name()='DOI' and .='10.13039/501100003246']]/*[local-name()='award']/concat($varNWO, .)"; |
|
121 |
// RCUK |
|
122 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Research Councils UK') or contains(., 'RCUK'))], ./*[local-name()='DOI' and .='10.13039/501100000690']]/*[local-name()='award']/concat($varRCUK, .)"; |
|
123 |
// SFI |
|
124 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Science Foundation Ireland')], ./*[local-name()='DOI' and .='10.13039/501100001602']]/*[local-name()='award']/concat($varSFI, .)"; |
|
125 |
// SNSF |
|
126 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Swiss National Science Foundation') or contains(., 'Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung')], ./*[local-name()='DOI' and .='10.13039/501100001711']]/*[local-name()='award']/concat($varSFI, .)"; |
|
127 |
// TUBITAK |
|
128 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and (contains(., 'Turkish National Science and Research Council') or (contains(., 'Türkiye Bilimsel ve Teknolojik Ara') and contains(., 'rma Kurumu')))], ./*[local-name()='DOI' and .='10.13039/501100004410']]/*[local-name()='award']/concat($varTUBITAK, .)"; |
|
129 |
// WT |
|
130 |
oaf:projectid = xpath:"//*[local-name()='funder'][./*[local-name()='name' and contains(., 'Wellcome Trust')], ./*[local-name()='DOI' and .='10.13039/100004440']]/*[local-name()='award']/concat($varWT, .)"; |
|
131 |
|
|
132 |
// dr:CobjCategory |
|
133 |
// records declared as dataset are not datasets, on the landing page they are declared as paper, letter, ... AMAs are also declared as datasets |
|
134 |
//dr:CobjCategory = Convert(xpath:"//*[local-name()='items']/*[local-name()='type' and not(. = 'dataset')]", TextTypologies); |
|
135 |
//dr:CobjCategory = Convert(xpath:"//*[local-name()='items']/*[local-name()='type']", TextTypologies); |
|
136 |
$varCobjCategory = Convert(xpath:"//*[local-name()='items']/*[local-name()='type']", TextTypologies); |
|
137 |
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes); |
|
138 |
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;); |
|
139 |
|
|
140 |
// review status |
|
141 |
// initially (June/July 2020) no review indications found |
|
142 |
|
|
143 |
// there seem to be records with several licenses indicating OA, so for $varEmbargoEnd min is chosen to avoid a sequence |
|
144 |
$varEmbargoEnd = xpath:"min(//*[local-name()='license'][./*[local-name()='URL' and (contains(., 'http://creativecommons.org/licenses/by') or contains(., 'http://www.elsevier.com/open-access/userlicense/'))]]/*[local-name()='start']/*[local-name()='date-time']/substring-before(., 'T'))"; |
|
145 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = "EMBARGO"; else $var0 = "''"; |
|
146 |
if xpath:"$varEmbargoEnd and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
147 |
if xpath:"not($varEmbargoEnd)" oaf:accessrights = "UNKNOWN"; else $var0 = "''"; |
|
148 |
dc:rights = xpath:"//*[local-name()='license']/*[local-name()='URL']"; |
|
149 |
if xpath:"$varEmbargoEnd" dc:date = xpath:"concat('info:eu-repo/date/embargoEnd/', $varEmbargoEnd)"; else $var0 = "''"; |
|
150 |
if xpath:"$varEmbargoEnd" oaf:license = xpath:"//*[local-name()='license']/*[local-name()='URL'][contains (., 'http://creativecommons.org/licenses/by') or contains(., 'http://www.elsevier.com/open-access/userlicense/')]"; else $var0 = "''"; |
|
151 |
|
|
152 |
// |
|
153 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
154 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
155 |
// |
|
156 |
|
|
157 |
apply xpath:"//*[local-name()='article-number']" if xpath:"true()" dr:CobjIdentifier = xpath:"."; else $var0 = "''"; |
|
158 |
|
|
159 |
//###BASE oaf:identifier |
|
160 |
$varId = identifierExtract('["//DOI"]' , xpath:"./*[local-name()='record']/*[local-name()='metadata']/*[local-name()='items']/*[local-name()='DOI']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
161 |
// 1st param: list of xpath expressions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group |
|
162 |
//$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)'); |
|
163 |
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')"; |
|
164 |
//$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)'); |
|
165 |
//$varIsbn = identifierExtract('["//ISBN[starts-with(normalize-space(.), \"http://id.crossref.org/isbn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!http://id.crossref.org/isbn/)((\d*[-\s]){3,4}[\dX])'); |
|
166 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
167 |
//oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";); |
|
168 |
//oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";); |
|
169 |
//oaf:identifier = set(xpath:"$varIsbn//value", @identifierType = "isbn";); |
|
170 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
171 |
// |
|
172 |
$varISSN = xpath:"//*[local-name()='ISSN']"; |
|
173 |
$varJournalTitle = xpath:"//*[local-name()='container-title']"; |
|
174 |
$varJournalVol = xpath:"//*[local-name()='volume']"; |
|
175 |
$varJournalIss = xpath:"//*[local-name()='issue']"; |
|
176 |
$varJournalSp = xpath:"//*[local-name()='page']/substring-before(., '-')"; |
|
177 |
$varJournalEp = xpath:"//*[local-name()='page']/substring-after(., '-')"; |
|
178 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @vol = xpath:"$varJournalVol";, @iss = xpath:"$varJournalIss";, @sp = xpath:"$varJournalSp";, @ep = xpath:"$varJournalEp";); |
|
179 |
|
|
180 |
end</CODE> |
|
181 |
</SCRIPT> |
|
182 |
</CONFIGURATION> |
|
183 |
<STATUS/> |
|
184 |
<SECURITY_PARAMETERS/> |
|
185 |
</BODY> |
|
186 |
</RESOURCE_PROFILE> |
Also available in: Unified diff
TR for BASE