Revision 53911
Added by Aenne Loehden over 5 years ago
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc_cleaning_OPENAIREplus_compliant.xml | ||
---|---|---|
4 | 4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
5 | 5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
6 | 6 |
<RESOURCE_URI value=""/> |
7 |
<DATE_OF_CREATION value="2017-03-01T16:28:28+00:00"/>
|
|
7 |
<DATE_OF_CREATION value="2018-10-11T16:43:14+00:00"/>
|
|
8 | 8 |
</HEADER> |
9 | 9 |
<BODY> |
10 | 10 |
<CONFIGURATION> |
... | ... | |
22 | 22 |
$var0 = "''"; |
23 | 23 |
$varFP7 = "'corda_______::'"; |
24 | 24 |
$varH2020 = "'corda__h2020::'"; |
25 |
$varAFF = "'aff_________::'"; |
|
26 |
$varARC = "'arc_________::'"; |
|
27 |
$varCONICYT = "'conicytf____::'"; |
|
28 |
$varDFG = "'dfgf________::'"; |
|
25 | 29 |
$varFCT="'fct_________::'"; |
30 |
$varFWF = "'fwf_________::'"; |
|
31 |
$varHRZZ = "'irb_hr______::'"; |
|
32 |
$varMESTD = "'mestd_______::'"; |
|
33 |
$varMZOS = "'irb_hr______::'"; |
|
34 |
$varNHMRC = "'nhmrc_______::'"; |
|
35 |
$varNIH = "'nih_________::'"; |
|
36 |
$varNSF = "'nsf_________::'"; |
|
37 |
$varNWO = "'nwo_________::'"; |
|
38 |
$varRCUK = "'rcuk________::'"; |
|
39 |
$varSFI ="'sfi_________::'"; |
|
40 |
$varSGOV = "'sgov________::'"; |
|
41 |
$varSNSF = "'snsf________::'"; |
|
42 |
$varTARA = "'taraexp_____::'"; |
|
43 |
$varTUBITAK = "'tubitakf____::'"; |
|
26 | 44 |
$varWT = "'wt__________::'"; |
27 |
$varMESTD = "'mestd_______::'"; |
|
28 | 45 |
$varDummy = "''"; |
29 | 46 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
30 | 47 |
static $varRepoid = xpath:"//dri:repositoryId"; |
... | ... | |
32 | 49 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
33 | 50 |
dri:repositoryId = $varRepoid; |
34 | 51 |
dri:recordIdentifier = xpath:"//dri:recordIdentifier"; |
52 |
|
|
53 |
// skip test records |
|
54 |
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = 'popper test archive'] and //dc:creator[lower-case(.) = 'author, test'] and //dc:description[starts-with(lower-case(.), 'a short description of the article')]" dc:title = skipRecord(); else $varDummy= "''"; |
|
55 |
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = ('test doc', 'test_publish', 'test html', 'final_test')] and //dc:description = //dc:title" dc:title = skipRecord(); else $varDummy= "''"; |
|
56 |
if xpath:"count(//*[matches(., '^test(test|[\s\d,])*$', 'i')]) >= 2" dc:title = skipRecord(); else $varDummy= "''"; |
|
57 |
|
|
35 | 58 |
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''"; |
36 | 59 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., 'US National Cancer Institute'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
37 | 60 |
if xpath:"//dc:title[string-length(.)> 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord(); |
... | ... | |
72 | 95 |
dc:contributor = xpath:"//dc:contributor"; |
73 | 96 |
dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))"; |
74 | 97 |
$varHttpTest = "''"; |
75 |
if xpath:"//dc:identifier[starts-with(., 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord();
|
|
98 |
if xpath:"//dc:identifier[starts-with(normalize-space(.), 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord();
|
|
76 | 99 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
77 | 100 |
dr:dateOfCollection = xpath:"//dri:dateOfCollection"; |
78 | 101 |
static dr:dateOfTransformation = xpath:"current-dateTime()"; |
... | ... | |
98 | 121 |
// H2020 |
99 | 122 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
100 | 123 |
// FCT |
101 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))"; |
|
124 |
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))";
|
|
102 | 125 |
// MESTD |
103 | 126 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))"; |
104 | 127 |
// WT |
105 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
128 |
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
129 |
// AFF |
|
130 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', '$3', 'i'))"; |
|
131 |
// ARC ([A-Z]+[\d/]*|\d+) |
|
132 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))"; |
|
133 |
// CONICYT \d{7,8} |
|
134 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))"; |
|
135 |
// DFG \d{7,9} |
|
136 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))"; |
|
137 |
// FCT |
|
138 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/.*/.*?(\d+).*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]*\d+[^/]*)(/.*)*$', '$1', 'i'))"; |
|
139 |
// FWF [A-Z]{1,3} \d* |
|
140 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))"; |
|
141 |
// HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)? |
|
142 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))"; |
|
143 |
// MESTD |
|
144 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))"; |
|
145 |
// MZOS \d{3}-\d{7}-\d{4} |
|
146 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))"; |
|
147 |
// NHMRC \d{3,6} |
|
148 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', '$1', 'i'))"; |
|
149 |
// NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1) |
|
150 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-?[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', '$1', 'i'))"; |
|
151 |
// NSF (\d{7}|\d{2}[A-Z]\d{4}) |
|
152 |
// |
|
153 |
// SNSF |
|
154 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
155 |
// TUBITAK |
|
156 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/(\d{3}[A-Z]\d{2,3})(/.*)?', '$1', 'i'))"; |
|
157 |
// WT |
|
158 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i'))"; |
|
159 |
|
|
106 | 160 |
dc:relation = xpath:"//dc:relation"; |
107 | 161 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
108 | 162 |
// |
... | ... | |
111 | 165 |
// |
112 | 166 |
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
113 | 167 |
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
114 |
if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
|
|
168 |
if xpath:"//dc:type[1]/lower-case(.) = 'text' or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
|
|
115 | 169 |
// |
116 | 170 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = "EMBARGO"; else $var0 = "''"; |
117 | 171 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''"; |
... | ... | |
126 | 180 |
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')"; |
127 | 181 |
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())"; |
128 | 182 |
// |
183 |
oaf:license = xpath:"//dc:rights[contains (., 'http://creativecommons.org/licenses/') or contains(., 'http://opensource.org/licenses/')]"; |
|
184 |
|
|
129 | 185 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
130 | 186 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
131 | 187 |
// |
... | ... | |
134 | 190 |
$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)'); |
135 | 191 |
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')"; |
136 | 192 |
$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)'); |
193 |
$varIsbn = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2097'] and starts-with(., '978') or starts-with(., '979')]"; |
|
194 |
$varHandle = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______2097'] and starts-with(., 'http://hdl.handle.net/')]/substring-after(., 'http://hdl.handle.net/')"; |
|
137 | 195 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
138 | 196 |
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";); |
139 | 197 |
oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";); |
198 |
oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";); |
|
199 |
oaf:identifier = set(xpath:"$varHandle", @identifierType = "handle";); |
|
140 | 200 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
141 | 201 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))"; |
142 |
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))"; |
|
202 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))"; |
|
203 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]"; |
|
204 |
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';')"; |
|
205 |
|
|
143 | 206 |
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))"; |
144 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
145 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";); |
|
207 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
208 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:source[//oaf:datasourceprefix='issn20381026'][matches(.,'\d\d\d\d-\d\d\d\d')][1], //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.='od______2097'] and matches(., '\d{4}-\d{3}[\dX]')]"; |
|
209 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]"; |
|
210 |
|
|
211 |
$varEISSN = xpath:"//dc:relation[starts-with(., 'eissn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/eissn/')]/replace(normalize-space(substring-after(., 'eissn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
212 |
//oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";); |
|
213 |
|
|
214 |
//to be improved: many identical checks |
|
215 |
$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')"; |
|
216 |
$varIss = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(substring-before(substring-after(., 'Nr.'), ','))"; |
|
217 |
$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])"; |
|
218 |
$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])"; |
|
219 |
|
|
220 |
//to be improved: many empty attributes |
|
221 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";); |
|
222 |
|
|
146 | 223 |
if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''"; |
224 |
|
|
225 |
if xpath:"//oaf:datasourceprefix[.='od______3848'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[ends-with(lower-case(normalize-space(.)), '.pdf')][starts-with(lower-case(normalize-space(.)), 'https://cris.cumulus.vub.ac.be/')]"; else $varDummy= "''"; |
|
226 |
if xpath:"//oaf:datasourceprefix[.='doaj21976775' or .='issn21976775'] and //dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]" oaf:fulltext = xpath:"concat(//dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]/normalize-space(.), '/pdf')"; else $varDummy= "''"; |
|
227 |
apply xpath:"//dc:relation[starts-with(., 'https://etalpykla.lituanistikadb.lt/fedora/get/')][//oaf:datasourceprefix[.='od______2712']]" if xpath:"true()" oaf:fulltext = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
228 |
|
|
229 |
// community |
|
230 |
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records) |
|
231 |
$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')"; |
|
232 |
oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;); |
|
233 |
|
|
147 | 234 |
end</CODE> |
148 | 235 |
</SCRIPT> |
149 | 236 |
</CONFIGURATION> |
Also available in: Unified diff