Revision 57718
Added by Alessia Bardi over 4 years ago
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc_cleaning_OPENAIREplus_compliant.xml | ||
---|---|---|
4 | 4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
5 | 5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
6 | 6 |
<RESOURCE_URI value=""/> |
7 |
<DATE_OF_CREATION value="2017-03-01T16:28:28+00:00"/>
|
|
7 |
<DATE_OF_CREATION value="2019-11-26T15:52:42+00:00"/>
|
|
8 | 8 |
</HEADER> |
9 | 9 |
<BODY> |
10 | 10 |
<CONFIGURATION> |
... | ... | |
12 | 12 |
<SCRIPT> |
13 | 13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant</TITLE> |
14 | 14 |
<CODE>declare_script "dc_cleaning_OpenAIREplus_compliant"; |
15 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
16 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
17 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
18 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
19 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
20 |
declare_ns oai = "http://www.openarchives.org/OAI/2.0/"; |
|
21 |
declare_ns xs = "http://www.w3.org/2001/XMLSchema"; |
|
22 |
$var0 = "''"; |
|
23 |
$varFP7 = "'corda_______::'"; |
|
24 |
$varH2020 = "'corda__h2020::'"; |
|
25 |
$varFCT="'fct_________::'"; |
|
26 |
$varWT = "'wt__________::'"; |
|
27 |
$varMESTD = "'mestd_______::'"; |
|
28 |
$varDummy = "''"; |
|
29 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
30 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
31 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]); |
|
32 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
33 |
dri:repositoryId = $varRepoid; |
|
34 |
dri:recordIdentifier = xpath:"//dri:recordIdentifier"; |
|
35 |
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., 'US National Cancer Institute'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
if xpath:"//dc:title[string-length(.)> 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord(); |
|
38 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
39 |
// |
|
40 |
//apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
41 |
// |
|
42 |
// subject |
|
43 |
// gather subjects: from fields setSpec, subject, classification, keywords |
|
44 |
// assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code |
|
45 |
// normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]' |
|
46 |
// remove duplicates: identical pairs of value/term and context/vocabulary |
|
47 |
$subjVocHarv = xpath:"'bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','rvk'"; // subject contexts/vocabularies as harvested |
|
48 |
$subjVocCode = xpath:"'bicssc','bk','ddc','gok','jel', 'jel', 'jel', 'jel','lcsh','mesh','msc','rvk'"; // subject contexts/vocabularies as normed within OpenAIRE |
|
49 |
$subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')"; // regular expression for subject contexts |
|
50 |
$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values |
|
51 |
$subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')"; // regular expression for subject contexts in field parameters |
|
52 |
// subject context: approved vocabulary/classification/thesaurus in field value |
|
53 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) > 0 and matches(., $subjVocVal,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
54 |
$subjListInVal = xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) > 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/)?([^/:]*)[:/](.*)','$3')))"; |
|
55 |
// subject context: approved vocabulary/classification/thesaurus in field parameter |
|
56 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) > 0 and matches(./@xsi:type, $subjVocPar,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
57 |
$subjListInPar = xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) > 0 and matches(./@xsi:type, $subjVocPar,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1),':',normalize-space(.))"; |
|
58 |
// subject context: approved vocabulary/classification/thesaurus in field value or parameter |
|
59 |
$subjListInParAndVal = xpath:"distinct-values(insert-before($subjListInVal,0,$subjListInPar))"; |
|
60 |
dc:subject = set(xpath:"$subjListInParAndVal", @classid=xpath:"substring-before(.,':')";, @classname=xpath:"substring-before(.,':')";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
61 |
// subject context: no (approved) vocabulary/classification/thesaurus |
|
62 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";); |
|
63 |
//$subListKeywords = xpath:"distinct-values((//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]/replace(concat(normalize-space(replace(.,'((info:eu-repo/classification/[^/]*/)|([^:]*:))(.*)','$4')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(replace(.,'(info:eu-repo/classification/)?([^/:]*)[/:](.*)','$2:$3'),':'))),']'),' \[\]',''))"; |
|
64 |
$subjListKeywordsInfo = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and starts-with(.,'info:eu-repo/classification/')] |
|
65 |
/replace(concat(normalize-space(replace(.,'info:eu-repo/classification/[^/]*/(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',replace(.,'info:eu-repo/classification/([^/]*)/.*','$1'))),']'),' \[\]','')"; |
|
66 |
$subjListKeywordsColon = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and not(starts-with(.,'info:eu-repo/classification/'))]/replace(concat(normalize-space(replace(.,'[^:]*:(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(.,':'))),']'),' \[\]','')"; |
|
67 |
$subjListKeywordsInfoAndColon = xpath:"distinct-values(insert-before($subjListKeywordsInfo,0,$subjListKeywordsColon))"; |
|
68 |
dc:subject = set(xpath:"$subjListKeywordsInfoAndColon", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";); |
|
69 |
// |
|
70 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
71 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
72 |
dc:contributor = xpath:"//dc:contributor"; |
|
73 |
dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))"; |
|
74 |
$varHttpTest = "''"; |
|
75 |
if xpath:"//dc:identifier[starts-with(., 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
76 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
77 |
dr:dateOfCollection = xpath:"//dri:dateOfCollection"; |
|
78 |
static dr:dateOfTransformation = xpath:"current-dateTime()"; |
|
79 |
dc:type = xpath:"//dc:type"; |
|
80 |
dc:format = xpath:"//dc:format"; |
|
81 |
dc:date = xpath:"//dc:date"; |
|
82 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
83 |
//dc:language = "eng"; |
|
84 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
85 |
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
86 |
if xpath:"//oaf:datasourceprefix[.='od_______883']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date[3]", DateISO8601); else $varDummy= "''"; |
|
87 |
if xpath:"//oaf:datasourceprefix[.='od______3063']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601); else $varDummy= "''"; |
|
88 |
if xpath:"(//oaf:datasourceprefix[.='od______2658'] or //oaf:datasourceprefix[.='od______1318']) and starts-with($varDateAccepted, '1000')" oaf:dateAccepted = $varDummy; else $varDummy= "''"; |
|
89 |
if xpath:"not(//oaf:datasourceprefix[.='od_______883']) and not(//oaf:datasourceprefix[.='od______3063']) and not(starts-with($varDateAccepted, '10') or starts-with($varDateAccepted, '00'))" oaf:dateAccepted = $varDateAccepted; else $varDummy= "''"; |
|
90 |
// apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
91 |
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')"; |
|
92 |
oaf:embargoenddate = $varEmbargoEnd; |
|
93 |
// FP7 |
|
94 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
95 |
// ERC (provided by OAPEN) |
|
96 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
97 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
98 |
// H2020 |
|
99 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
100 |
// FCT |
|
101 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))"; |
|
102 |
// MESTD |
|
103 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))"; |
|
104 |
// WT |
|
105 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
106 |
dc:relation = xpath:"//dc:relation"; |
|
107 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
108 |
// |
|
109 |
// |
|
110 |
oaf:collectedDatasourceid = xpath:"$varDatasourceid"; |
|
111 |
// |
|
112 |
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
113 |
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
114 |
if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies); |
|
115 |
// |
|
116 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = "EMBARGO"; else $var0 = "''"; |
|
117 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''"; |
|
118 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
119 |
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
120 |
// |
|
121 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
122 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
123 |
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/restrictedAccess') ]" oaf:accessrights = "RESTRICTED"; else $var0 = "''"; |
|
124 |
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
125 |
// oaf:accessrights = xpath:"//dc:rights[ not(starts-with(normalize-space(.), 'info:eu-repo/semantics')) and xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date()]/concat('OPEN')"; |
|
126 |
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')"; |
|
127 |
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())"; |
|
128 |
// |
|
129 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
130 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
131 |
// |
|
132 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
133 |
// 1st param: list of xpath expresssions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group |
|
134 |
$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)'); |
|
135 |
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')"; |
|
136 |
$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)'); |
|
137 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
138 |
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";); |
|
139 |
oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";); |
|
140 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
141 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))"; |
|
142 |
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))"; |
|
143 |
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))"; |
|
144 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
145 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";); |
|
146 |
if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''"; |
|
147 |
end</CODE> |
|
15 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
16 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
17 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
18 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
19 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
20 |
declare_ns oai = "http://www.openarchives.org/OAI/2.0/"; |
|
21 |
declare_ns xs = "http://www.w3.org/2001/XMLSchema"; |
|
22 |
$var0 = "''"; |
|
23 |
$varFP7 = "'corda_______::'"; |
|
24 |
$varH2020 = "'corda__h2020::'"; |
|
25 |
$varAKA = "'aka_________::'"; // tbd, no statements yet |
|
26 |
$varAFF = "'aff_________::'"; |
|
27 |
$varARC = "'arc_________::'"; |
|
28 |
$varCONICYT = "'conicytf____::'"; |
|
29 |
$varDFG = "'dfgf________::'"; |
|
30 |
$varFCT="'fct_________::'"; |
|
31 |
$varFWF = "'fwf_________::'"; |
|
32 |
$varGSRT = "'gsrt________::'"; |
|
33 |
$varHRZZ = "'irb_hr______::'"; |
|
34 |
$varINNOVIRIS = "'innoviris___::'"; |
|
35 |
$varMESTD = "'mestd_______::'"; |
|
36 |
$varMIUR = "'miur________::'"; // tbd, no statements yet |
|
37 |
$varMZOS = "'irb_hr______::'"; |
|
38 |
$varNHMRC = "'nhmrc_______::'"; |
|
39 |
$varNIH = "'nih_________::'"; |
|
40 |
$varNSF = "'nsf_________::'"; |
|
41 |
$varNWO = "'nwo_________::'"; |
|
42 |
$varRCUK = "'rcuk________::'"; |
|
43 |
$varRIF = "'rif_________::'"; |
|
44 |
$varRSF = "'rsf_________::'"; |
|
45 |
$varSFI ="'sfi_________::'"; |
|
46 |
$varSGOV = "'sgov________::'"; |
|
47 |
$varSNSF = "'snsf________::'"; |
|
48 |
$varTARA = "'taraexp_____::'"; |
|
49 |
$varTUBITAK = "'tubitakf____::'"; |
|
50 |
$varWT = "'wt__________::'"; |
|
51 |
$varDummy = "''"; |
|
52 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
53 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
54 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]); |
|
55 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
56 |
dri:repositoryId = $varRepoid; |
|
57 |
dri:recordIdentifier = xpath:"//dri:recordIdentifier"; |
|
58 |
|
|
59 |
// skip test records |
|
60 |
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = 'popper test archive'] and //dc:creator[lower-case(.) = 'author, test'] and //dc:description[starts-with(lower-case(.), 'a short description of the article')]" dc:title = skipRecord(); else $varDummy= "''"; |
|
61 |
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = ('test doc', 'test_publish', 'test html', 'final_test')] and //dc:description = //dc:title" dc:title = skipRecord(); else $varDummy= "''"; |
|
62 |
if xpath:"count(//*[matches(., '^test(test|[\s\d,])*$', 'i')]) >= 2" dc:title = skipRecord(); else $varDummy= "''"; |
|
63 |
|
|
64 |
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''"; |
|
65 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., 'US National Cancer Institute')) and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
66 |
if xpath:"//dc:title[string-length(.)> 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord(); |
|
67 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
68 |
// |
|
69 |
//apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
70 |
// |
|
71 |
// subject |
|
72 |
// gather subjects: from fields setSpec, subject, classification, keywords |
|
73 |
// assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code |
|
74 |
// normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]' |
|
75 |
// remove duplicates: identical pairs of value/term and context/vocabulary |
|
76 |
$subjVocHarv = xpath:"'acm','bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as harvested |
|
77 |
$subjVocCode = xpath:"'ccs','bicssc','bk','ddc','gok','jel', 'jel', 'jel', 'jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as normed within OpenAIRE |
|
78 |
$subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')"; // regular expression for subject contexts |
|
79 |
$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values |
|
80 |
$subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')"; // regular expression for subject contexts in field parameters |
|
81 |
// subject context: approved vocabulary/classification/thesaurus in field value |
|
82 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) > 0 and matches(., $subjVocVal,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
83 |
$subjListInVal = xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) > 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/)?([^/:]*)[:/](.*)','$3')))"; |
|
84 |
// subject context: approved vocabulary/classification/thesaurus in field parameter |
|
85 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) > 0 and matches(./@xsi:type, $subjVocPar,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
86 |
$subjListInPar = xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) > 0 and matches(./@xsi:type, $subjVocPar,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1),':',normalize-space(.))"; |
|
87 |
// subject context: approved vocabulary/classification/thesaurus in field value or parameter |
|
88 |
$subjListInParAndVal = xpath:"distinct-values(insert-before($subjListInVal,0,$subjListInPar))"; |
|
89 |
dc:subject = set(xpath:"$subjListInParAndVal", @classid=xpath:"substring-before(.,':')";, @classname=xpath:"substring-before(.,':')";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";); |
|
90 |
// subject context: no (approved) vocabulary/classification/thesaurus |
|
91 |
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";); |
|
92 |
//$subListKeywords = xpath:"distinct-values((//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]/replace(concat(normalize-space(replace(.,'((info:eu-repo/classification/[^/]*/)|([^:]*:))(.*)','$4')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(replace(.,'(info:eu-repo/classification/)?([^/:]*)[/:](.*)','$2:$3'),':'))),']'),' \[\]',''))"; |
|
93 |
$subjListKeywordsInfo = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and starts-with(.,'info:eu-repo/classification/')] |
|
94 |
/replace(concat(normalize-space(replace(.,'info:eu-repo/classification/[^/]*/(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',replace(.,'info:eu-repo/classification/([^/]*)/.*','$1'))),']'),' \[\]','')"; |
|
95 |
$subjListKeywordsColon = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) > 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and not(starts-with(.,'info:eu-repo/classification/'))]/replace(concat(normalize-space(replace(.,'[^:]*:(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(.,':'))),']'),' \[\]','')"; |
|
96 |
$subjListKeywordsInfoAndColon = xpath:"distinct-values(insert-before($subjListKeywordsInfo,0,$subjListKeywordsColon))"; |
|
97 |
dc:subject = set(xpath:"$subjListKeywordsInfoAndColon", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";); |
|
98 |
// |
|
99 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
100 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
101 |
dc:contributor = xpath:"//dc:contributor"; |
|
102 |
dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))"; |
|
103 |
$varHttpTest = "''"; |
|
104 |
if xpath:"//dc:identifier[starts-with(normalize-space(.), 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
105 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
106 |
dr:dateOfCollection = xpath:"//dri:dateOfCollection"; |
|
107 |
static dr:dateOfTransformation = xpath:"current-dateTime()"; |
|
108 |
dc:type = xpath:"//dc:type"; |
|
109 |
dc:format = xpath:"//dc:format"; |
|
110 |
dc:date = xpath:"//dc:date"; |
|
111 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
112 |
//dc:language = "eng"; |
|
113 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
114 |
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
115 |
if xpath:"//oaf:datasourceprefix[.='od_______883']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date[3]", DateISO8601); else $varDummy= "''"; |
|
116 |
if xpath:"//oaf:datasourceprefix[.='od______3063']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601); else $varDummy= "''"; |
|
117 |
if xpath:"(//oaf:datasourceprefix[.='od______2658'] or //oaf:datasourceprefix[.='od______1318']) and starts-with($varDateAccepted, '1000')" oaf:dateAccepted = $varDummy; else $varDummy= "''"; |
|
118 |
if xpath:"not(//oaf:datasourceprefix[.='od_______883']) and not(//oaf:datasourceprefix[.='od______3063']) and not(starts-with($varDateAccepted, '10') or starts-with($varDateAccepted, '00'))" oaf:dateAccepted = $varDateAccepted; else $varDummy= "''"; |
|
119 |
// apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
120 |
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')"; |
|
121 |
oaf:embargoenddate = $varEmbargoEnd; |
|
122 |
// FP7 |
|
123 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
124 |
// ERC (provided by OAPEN) |
|
125 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
126 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
127 |
// H2020 |
|
128 |
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
129 |
// AFF |
|
130 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))"; |
|
131 |
// AKA \d* |
|
132 |
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAKA, replace(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)(/.*)?', '$1', 'i')))"; |
|
133 |
// ARC ([A-Z]+[\d/]*|\d+) |
|
134 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))"; |
|
135 |
// CONICYT \d{7,8} |
|
136 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))"; |
|
137 |
// DFG \d{7,9} |
|
138 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))"; |
|
139 |
// FCT (SFRH/BD/)(\d+)(/\d+) ... ((SFRH|PRAXIS XXI|PD|FMRH)/[A-Z]*/)?\d*(/\d*)? ... |
|
140 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
141 |
// FWF [A-Z]{1,3} \d* |
|
142 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))"; |
|
143 |
// GSRT |
|
144 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varGSRT, replace(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
145 |
// HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)? |
|
146 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))"; |
|
147 |
// INNOVIRIS |
|
148 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/[^/]+', 'i')]/concat($varINNOVIRIS, replace(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
149 |
// MESTD \d* |
|
150 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mestd/[^/]*/\d+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), 'info:eu-repo/grantagreement/mestd/[^/]*/(\d+)(/.*)?', '$1', 'i'))"; |
|
151 |
// MIUR [A-Z0-9]* |
|
152 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?[A-Z0-9]*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMIUR, replace(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?([A-Z0-9]*).*?', '$1', 'i'))"; |
|
153 |
// MZOS \d{3}-\d{7}-\d{4} |
|
154 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4})', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))"; |
|
155 |
// NHMRC \d{3,6} |
|
156 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/[^/]*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/[^/]*/.*?(\d{3,6}).*?', '$1', 'i'))"; |
|
157 |
// NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1) ... hm |
|
158 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/[^/]+', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
159 |
// NSF (\d{7}|\d{2}[A-Z]\d{4}) |
|
160 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/[^/]+', 'i')]/concat($varNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
161 |
// NWO |
|
162 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/[^/]+', 'i')]/concat($varNWO, replace(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
163 |
// RCUK |
|
164 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/[^/]+', 'i')]/concat($varRCUK, replace(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
165 |
// RIF |
|
166 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/[^/]+', 'i')]/concat($varRIF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
167 |
// RSF |
|
168 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/[^/]+', 'i')]/concat($varRSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
169 |
// SFI |
|
170 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/[^/]+', 'i')]/concat($varSFI, replace(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
171 |
// SGOV |
|
172 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/[^/]+', 'i')]/concat($varSGOV, replace(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
173 |
// SNSF |
|
174 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
175 |
// TARA |
|
176 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/[^/]+', 'i')]/concat($varTARA, replace(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/([^/]+)(/.*)?', '$1', 'i'))"; |
|
177 |
// TUBITAK \d{3}[A-Z]\d{2,3} |
|
178 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/.*?\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/.*?(\d{3}[A-Z]\d{2,3})/.*?', '$1', 'i'))"; |
|
179 |
// WT [^\s]* |
|
180 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/(wellcome trust|wt)/[^/]*/[^\s/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), 'info:eu-repo/grantagreement/(wellcome trust|wt)/[^/]*/([^\s/]*).*?', '$1', 'i'))"; |
|
181 |
// WT |
|
182 |
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))"; |
|
183 |
|
|
184 |
dc:relation = xpath:"//dc:relation"; |
|
185 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
186 |
// |
|
187 |
// |
|
188 |
oaf:collectedDatasourceid = xpath:"$varDatasourceid"; |
|
189 |
// |
|
190 |
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
191 |
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
192 |
// if xpath:"//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies); |
|
193 |
//$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
194 |
//$varCobjCategory = Convert(xpath:"(reverse((//dc:type | //oai:setSpec)[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']) | (//dc:type | //oai:setSpec)[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other')) and not(//oaf:datasourceprefix/lower-case(.) = 'openedition_')])", TextTypologies); |
|
195 |
//insert-before - del |
|
196 |
//$varCobjCategory = Convert(xpath:"insert-before((reverse((//dc:type | //oai:setSpec)[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'])), 100, ( (//dc:type | //oai:setSpec)[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other')) and not(//oaf:datasourceprefix/lower-case(.) = 'openedition_')]))", TextTypologies); |
|
197 |
|
|
198 |
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies); |
|
199 |
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes); |
|
200 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;); |
|
201 |
|
|
202 |
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies); |
|
203 |
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes); |
|
204 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_') and (not(//oaf:datasourceprefix/lower-case(.) = 'od________65'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;); |
|
205 |
|
|
206 |
// CERN CDS when dc:type or setSpec explicitly states resource type |
|
207 |
// (currently :CONF not covered as not included in vocabulary, and as landing in literature already; other sets might also be addressed, depending on marked resource types) |
|
208 |
$varCobjCategoryCernExplicit = Convert(xpath:"normalize-space((//dc:type, //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]/tokenize(., ':')[2])[1])", TextTypologies); |
|
209 |
$varSuperTypeCernExplicit = Convert(xpath:"normalize-space($varCobjCategoryCernExplicit)", SuperTypes); |
|
210 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and (//dc:type or //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernExplicit", @type = $varSuperTypeCernExplicit;); |
|
211 |
|
|
212 |
// CERN CDS when set vaguely hints on literature |
|
213 |
$varCobjCategoryCernVague = xpath:"//oaf:datasourceprefix[not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]) and //*[local-name() = 'setSpec'][ends-with(., ':FULLTEXT')]]/'0038'"; |
|
214 |
$varSuperTypeCernVague = Convert(xpath:"normalize-space($varCobjCategoryCernVague)", SuperTypes); |
|
215 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernVague", @type = $varSuperTypeCernVague;); |
|
216 |
|
|
217 |
// CERN CDS when no hint |
|
218 |
$varCobjCategoryCernUnknown = xpath:"//oaf:datasourceprefix[not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT') or ends-with(., ':FULLTEXT')])]/'0000'"; |
|
219 |
$varSuperTypeCernUnknown = Convert(xpath:"normalize-space($varCobjCategoryCernUnknown)", SuperTypes); |
|
220 |
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernUnknown", @type = $varSuperTypeCernUnknown;); |
|
221 |
|
|
222 |
// |
|
223 |
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()) and not(//oaf:datasourceprefix = 'od_______151')" rights = "EMBARGO"; else $var0 = "''"; |
|
224 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] or (//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''"; |
|
225 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
226 |
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0 and not($varDatasourceid = ('opendoar____::3532', 'opendoar____::109', 'opendoar____::151'))" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
227 |
// |
|
228 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
229 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
230 |
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/restrictedAccess') ]" oaf:accessrights = "RESTRICTED"; else $var0 = "''"; |
|
231 |
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
232 |
// oaf:accessrights = xpath:"//dc:rights[ not(starts-with(normalize-space(.), 'info:eu-repo/semantics')) and xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date()]/concat('OPEN')"; |
|
233 |
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')"; |
|
234 |
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())"; |
|
235 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'fulltext'" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
236 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'abstractOnly'" oaf:accessrights = "CLOSED"; else $var0 = "''"; |
|
237 |
if xpath:"$varDatasourceid = 'opendoar____::3532' and not(//dc:format = ('fulltext', 'abstractOnly'))" oaf:accessrights = "UNKNOWN"; else $var0 = "''"; |
|
238 |
if xpath:"$varDatasourceid = 'opendoar____::109'" oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights[starts-with(., 'http')][1])", AccessRights); else $var0 = "''"; |
|
239 |
// |
|
240 |
//oaf:license = xpath:"//dc:rights[contains (., 'http://creativecommons.org/licenses/') or contains(., 'http://opensource.org/licenses/')]"; |
|
241 |
oaf:license = xpath:"//dc:rights[starts-with (., 'http') and contains(., 'license')]"; |
|
242 |
// |
|
243 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
244 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
245 |
// |
|
246 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
247 |
// 1st param: list of xpath expresssions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group |
|
248 |
$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)'); |
|
249 |
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')"; |
|
250 |
$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)'); |
|
251 |
$varIsbn = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2097'] and starts-with(., '978') or starts-with(., '979')]"; |
|
252 |
$varHandle = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______2097'] and starts-with(., 'http://hdl.handle.net/')]/substring-after(., 'http://hdl.handle.net/')"; |
|
253 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
254 |
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";); |
|
255 |
oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";); |
|
256 |
oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";); |
|
257 |
oaf:identifier = set(xpath:"$varHandle", @identifierType = "handle";); |
|
258 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
259 |
|
|
260 |
// journal data; PURE exposes ISSN in field ns2:isPartOf |
|
261 |
|
|
262 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))"; |
|
263 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))"; |
|
264 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]"; |
|
265 |
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';')"; |
|
266 |
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';'), //dc:source[//*[local-name()='isPartOf']][matches(., '.*'.*')]"; |
|
267 |
|
|
268 |
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))"; |
|
269 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
270 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:source[//oaf:datasourceprefix='issn20381026'][matches(.,'\d\d\d\d-\d\d\d\d')][1], //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.='od______2097'] and matches(., '\d{4}-\d{3}[\dX]')]"; |
|
271 |
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]"; |
|
272 |
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1], //*[local-name()='isPartOf'][starts-with(., 'urn:ISSN:')]/substring-after(., 'urn:ISSN:')"; |
|
273 |
|
|
274 |
$varEISSN = xpath:"//dc:relation[starts-with(., 'eissn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/eissn/')]/replace(normalize-space(substring-after(., 'eissn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')"; |
|
275 |
//oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";); |
|
276 |
|
|
277 |
//to be improved: many identical checks |
|
278 |
//$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')"; |
|
279 |
$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1'), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', vol. ')]/normalize-space(substring-before(substring-after(., ', vol. '), ','))"; |
|
280 |
$varIss = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(substring-before(substring-after(., 'Nr.'), ','))"; |
|
281 |
//$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])"; |
|
282 |
$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. (\d*)-\d*[\s,\.;].*$', '$1')"; |
|
283 |
//$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])"; |
|
284 |
$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. \d*-(\d*)[\s,\.;].*$', '$1')"; |
|
285 |
|
|
286 |
//to be improved: many empty attributes |
|
287 |
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";); |
|
288 |
|
|
289 |
if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''"; |
|
290 |
|
|
291 |
if xpath:"//oaf:datasourceprefix[.='od______3848'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[ends-with(lower-case(normalize-space(.)), '.pdf')][starts-with(lower-case(normalize-space(.)), 'https://cris.cumulus.vub.ac.be/')]"; else $varDummy= "''"; |
|
292 |
if xpath:"//oaf:datasourceprefix[.='doaj21976775' or .='issn21976775'] and //dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]" oaf:fulltext = xpath:"concat(//dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]/normalize-space(.), '/pdf')"; else $varDummy= "''"; |
|
293 |
apply xpath:"//dc:relation[starts-with(., 'https://etalpykla.lituanistikadb.lt/fedora/get/')][//oaf:datasourceprefix[.='od______2712']]" if xpath:"true()" oaf:fulltext = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
294 |
if xpath:"//oaf:datasourceprefix[.='od______4149'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[contains(lower-case(normalize-space(.)), '/datastream/')]"; else $varDummy= "''"; |
|
295 |
|
|
296 |
// community |
|
297 |
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records) |
|
298 |
//$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')"; |
|
299 |
$varCommunityAtt = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-after(., 'url:')"; |
|
300 |
$varCommunityVal = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-before(., 'url:')"; |
|
301 |
//oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;); |
|
302 |
oaf:concept = set(xpath:"$varCommunityVal", @id = xpath:"subsequence($varCommunityAtt,position(),1)";); |
|
303 |
|
|
304 |
end</CODE> |
|
148 | 305 |
</SCRIPT> |
149 | 306 |
</CONFIGURATION> |
150 | 307 |
<STATUS/> |
Also available in: Unified diff
Synchronised with the version currently running on beta after fixing the namespace prefix for RSF, innoviris and RIF