Revision 57401
Added by Aenne Loehden about 5 years ago
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc_cleaning_OPENAIREplus_compliant.xml | ||
---|---|---|
4 | 4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
5 | 5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
6 | 6 |
<RESOURCE_URI value=""/> |
7 |
<DATE_OF_CREATION value="2018-10-11T16:43:14+00:00"/>
|
|
7 |
<DATE_OF_CREATION value="2019-07-30T09:30:36+00:00"/>
|
|
8 | 8 |
</HEADER> |
9 | 9 |
<BODY> |
10 | 10 |
<CONFIGURATION> |
... | ... | |
22 | 22 |
$var0 = "''"; |
23 | 23 |
$varFP7 = "'corda_______::'"; |
24 | 24 |
$varH2020 = "'corda__h2020::'"; |
25 |
$varAKA = "'aka_________::'"; // tbd, no statements yet |
|
25 | 26 |
$varAFF = "'aff_________::'"; |
26 | 27 |
$varARC = "'arc_________::'"; |
27 | 28 |
$varCONICYT = "'conicytf____::'"; |
... | ... | |
30 | 31 |
$varFWF = "'fwf_________::'"; |
31 | 32 |
$varHRZZ = "'irb_hr______::'"; |
32 | 33 |
$varMESTD = "'mestd_______::'"; |
34 |
$varMIUR = "'miur________::'"; // tbd, no statements yet |
|
33 | 35 |
$varMZOS = "'irb_hr______::'"; |
34 | 36 |
$varNHMRC = "'nhmrc_______::'"; |
35 | 37 |
$varNIH = "'nih_________::'"; |
... | ... | |
56 | 58 |
if xpath:"count(//*[matches(., '^test(test|[\s\d,])*$', 'i')]) >= 2" dc:title = skipRecord(); else $varDummy= "''"; |
57 | 59 |
|
58 | 60 |
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''"; |
59 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., 'US National Cancer Institute'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
61 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., 'US National Cancer Institute')) and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
|
60 | 62 |
if xpath:"//dc:title[string-length(.)> 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord(); |
61 | 63 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
62 | 64 |
// |
... | ... | |
67 | 69 |
// assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code |
68 | 70 |
// normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]' |
69 | 71 |
// remove duplicates: identical pairs of value/term and context/vocabulary |
70 |
$subjVocHarv = xpath:"'bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','rvk'"; // subject contexts/vocabularies as harvested
|
|
71 |
$subjVocCode = xpath:"'bicssc','bk','ddc','gok','jel', 'jel', 'jel', 'jel','lcsh','mesh','msc','rvk'"; // subject contexts/vocabularies as normed within OpenAIRE
|
|
72 |
$subjVocHarv = xpath:"'acm','bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as harvested
|
|
73 |
$subjVocCode = xpath:"'ccs','bicssc','bk','ddc','gok','jel', 'jel', 'jel', 'jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as normed within OpenAIRE
|
|
72 | 74 |
$subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')"; // regular expression for subject contexts |
73 | 75 |
$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values |
74 | 76 |
$subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')"; // regular expression for subject contexts in field parameters |
... | ... | |
127 | 129 |
// WT |
128 | 130 |
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
129 | 131 |
// AFF |
130 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', '$3', 'i'))"; |
|
132 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))";
|
|
131 | 133 |
// ARC ([A-Z]+[\d/]*|\d+) |
132 | 134 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))"; |
133 | 135 |
// CONICYT \d{7,8} |
... | ... | |
165 | 167 |
// |
166 | 168 |
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
167 | 169 |
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
168 |
if xpath:"//dc:type[1]/lower-case(.) = 'text' or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies); |
|
170 |
// if xpath:"//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies); |
|
171 |
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies); |
|
172 |
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes); |
|
173 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;); |
|
174 |
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies); |
|
175 |
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes); |
|
176 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_') and (not(//oaf:datasourceprefix/lower-case(.) = 'od________65'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;); |
|
177 |
|
|
178 |
// CERN CDS when dc:type or setSpec explicitly states resource type |
|
179 |
// (currently :CONF not covered as not included in vocabulary, and as landing in literature already; other sets might also be addressed, depending on marked resource types) |
|
180 |
$varCobjCategoryCernExplicit = Convert(xpath:"normalize-space((//dc:type, //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]/tokenize(., ':')[2])[1])", TextTypologies); |
|
181 |
$varSuperTypeCernExplicit = Convert(xpath:"normalize-space($varCobjCategoryCernExplicit)", SuperTypes); |
|
182 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and (//dc:type or //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernExplicit", @type = $varSuperTypeCernExplicit;); |
|
183 |
// CERN CDS when set vaguely hints on literature |
|
184 |
$varCobjCategoryCernVague = xpath:"//oaf:datasourceprefix[not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]) and //*[local-name() = 'setSpec'][ends-with(., ':FULLTEXT')]]/'0038'"; |
|
185 |
$varSuperTypeCernVague = Convert(xpath:"normalize-space($varCobjCategoryCernVague)", SuperTypes); |
|
186 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernVague", @type = $varSuperTypeCernVague;); |
|
187 |
// CERN CDS when no hint |
|
188 |
$varCobjCategoryCernUnknown = xpath:"//oaf:datasourceprefix[not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT') or ends-with(., ':FULLTEXT')])]/'0000'"; |
|
189 |
$varSuperTypeCernUnknown = Convert(xpath:"normalize-space($varCobjCategoryCernUnknown)", SuperTypes); |
|
190 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernUnknown", @type = $varSuperTypeCernUnknown;); |
|
169 | 191 |
// |
170 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = "EMBARGO"; else $var0 = "''"; |
|
171 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''"; |
|
172 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
173 |
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
192 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()) and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "EMBARGO"; else $var0 = "''";
|
|
193 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] or (//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''";
|
|
194 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "OPEN"; else $var0 = "''";
|
|
195 |
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0 and not($varDatasourceid = ('opendoar____::3532', 'opendoar____::109', 'opendoar____::151'))" oaf:accessrights = "OPEN"; else $var0 = "''";
|
|
174 | 196 |
// |
175 | 197 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
176 | 198 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
... | ... | |
180 | 202 |
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')"; |
181 | 203 |
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())"; |
182 | 204 |
// |
205 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'fulltext'" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
206 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'abstractOnly'" oaf:accessrights = "CLOSED"; else $var0 = "''"; |
|
207 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and not(//dc:format = ('fulltext', 'abstractOnly'))" oaf:accessrights = "UNKNOWN"; else $var0 = "''"; |
|
208 |
if xpath:"$varDatasourceid = 'opendoar____::109'" oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights[starts-with(., 'http')][1])", AccessRights); else $var0 = "''"; |
|
183 | 209 |
oaf:license = xpath:"//dc:rights[contains (., 'http://creativecommons.org/licenses/') or contains(., 'http://opensource.org/licenses/')]"; |
184 | 210 |
|
185 | 211 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
... | ... | |
198 | 224 |
oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";); |
199 | 225 |
oaf:identifier = set(xpath:"$varHandle", @identifierType = "handle";); |
200 | 226 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
227 |
|
|
228 |
// journal data; |
|
229 |
// PURE: exposes ISSN in field ns2:isPartOf, journal title not extractable due to ' usage in source field |
|
230 |
|
|
201 | 231 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))"; |
202 | 232 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))"; |
203 | 233 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]"; |
... | ... | |
206 | 236 |
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))"; |
207 | 237 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
208 | 238 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:source[//oaf:datasourceprefix='issn20381026'][matches(.,'\d\d\d\d-\d\d\d\d')][1], //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.='od______2097'] and matches(., '\d{4}-\d{3}[\dX]')]"; |
209 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]"; |
|
239 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]"; |
|
240 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1], //*[local-name()='isPartOf'][starts-with(., 'urn:ISSN:')]/substring-after(., 'urn:ISSN:')"; |
|
210 | 241 |
|
211 | 242 |
$varEISSN = xpath:"//dc:relation[starts-with(., 'eissn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/eissn/')]/replace(normalize-space(substring-after(., 'eissn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
212 | 243 |
//oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";); |
213 | 244 |
|
214 | 245 |
//to be improved: many identical checks |
215 |
$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')"; |
|
246 |
//$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')"; |
|
247 |
$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1'), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', vol. ')]/normalize-space(substring-before(substring-after(., ', vol. '), ','))"; |
|
216 | 248 |
$varIss = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(substring-before(substring-after(., 'Nr.'), ','))"; |
217 |
$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])"; |
|
218 |
$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])"; |
|
249 |
//$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])"; |
|
250 |
$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. (\d*)-\d*[\s,\.;].*$', '$1')"; |
|
251 |
//$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])"; |
|
252 |
$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. \d*-(\d*)[\s,\.;].*$', '$1')"; |
|
219 | 253 |
|
220 | 254 |
//to be improved: many empty attributes |
221 | 255 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";); |
... | ... | |
225 | 259 |
if xpath:"//oaf:datasourceprefix[.='od______3848'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[ends-with(lower-case(normalize-space(.)), '.pdf')][starts-with(lower-case(normalize-space(.)), 'https://cris.cumulus.vub.ac.be/')]"; else $varDummy= "''"; |
226 | 260 |
if xpath:"//oaf:datasourceprefix[.='doaj21976775' or .='issn21976775'] and //dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]" oaf:fulltext = xpath:"concat(//dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]/normalize-space(.), '/pdf')"; else $varDummy= "''"; |
227 | 261 |
apply xpath:"//dc:relation[starts-with(., 'https://etalpykla.lituanistikadb.lt/fedora/get/')][//oaf:datasourceprefix[.='od______2712']]" if xpath:"true()" oaf:fulltext = xpath:"normalize-space(.)"; else $varDummy = "''"; |
262 |
if xpath:"//oaf:datasourceprefix[.='od______4149'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[contains(lower-case(normalize-space(.)), '/datastream/')]"; else $varDummy= "''"; |
|
228 | 263 |
|
229 | 264 |
// community |
230 | 265 |
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records) |
231 |
$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')"; |
|
232 |
oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;); |
|
266 |
//$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')"; |
|
267 |
//oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;); |
|
268 |
$varCommunityAtt = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-after(., 'url:')"; |
|
269 |
$varCommunityVal = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-before(., 'url:')"; |
|
270 |
oaf:concept = set(xpath:"$varCommunityVal", @id = xpath:"subsequence($varCommunityAtt,position(),1)";); |
|
233 | 271 |
|
234 | 272 |
end</CODE> |
235 | 273 |
</SCRIPT> |
Also available in: Unified diff