Project

General

Profile

« Previous | Next » 

Revision 57718

Synchronised with the version currently running on beta after fixing the namespace prefix for RSF, innoviris and RIF

View differences:

dc_cleaning_OPENAIREplus_compliant.xml
4 4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5 5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6 6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2017-03-01T16:28:28+00:00"/>
7
        <DATE_OF_CREATION value="2019-11-26T15:52:42+00:00"/>
8 8
    </HEADER>
9 9
    <BODY>
10 10
        <CONFIGURATION>
......
12 12
            <SCRIPT>
13 13
                <TITLE>dc_cleaning_OPENAIREplus_compliant</TITLE>
14 14
                <CODE>declare_script "dc_cleaning_OpenAIREplus_compliant";
15
declare_ns oaf = "http://namespace.openaire.eu/oaf";
16
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
17
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
18
declare_ns dc = "http://purl.org/dc/elements/1.1/";
19
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
20
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
21
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
22
$var0 = "''";
23
$varFP7 = "'corda_______::'";
24
$varH2020 = "'corda__h2020::'";
25
$varFCT="'fct_________::'";
26
$varWT = "'wt__________::'";
27
$varMESTD = "'mestd_______::'";
28
$varDummy = "''";
29
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
30
static $varRepoid = xpath:"//dri:repositoryId";
31
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
32
dri:objIdentifier = xpath:"//dri:objIdentifier";
33
dri:repositoryId = $varRepoid;
34
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
35
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''";
36
apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and not(contains(., 'US National Cancer Institute'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
37
if xpath:"//dc:title[string-length(.)&gt; 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord();
38
apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
39
//
40
//apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
41
//
42
// subject
43
// gather subjects: from fields setSpec, subject, classification, keywords
44
// assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code
45
// normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]'
46
// remove duplicates: identical pairs of value/term and context/vocabulary
47
$subjVocHarv    = xpath:"'bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','rvk'";            // subject contexts/vocabularies as harvested
48
$subjVocCode    = xpath:"'bicssc','bk','ddc','gok','jel',                       'jel',           'jel',              'jel','lcsh','mesh','msc','rvk'";            // subject contexts/vocabularies as normed within OpenAIRE
49
$subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')";       // regular expression for subject contexts
50
$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values
51
$subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')";               // regular expression for subject contexts in field parameters
52
// subject context: approved vocabulary/classification/thesaurus in field value
53
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
54
$subjListInVal = xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/)?([^/:]*)[:/](.*)','$3')))";
55
// subject context: approved vocabulary/classification/thesaurus in field parameter
56
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
57
$subjListInPar = xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1),':',normalize-space(.))";
58
// subject context: approved vocabulary/classification/thesaurus in field value or parameter 
59
$subjListInParAndVal = xpath:"distinct-values(insert-before($subjListInVal,0,$subjListInPar))";
60
dc:subject = set(xpath:"$subjListInParAndVal",  @classid=xpath:"substring-before(.,':')";, @classname=xpath:"substring-before(.,':')";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
61
// subject context: no (approved) vocabulary/classification/thesaurus
62
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
63
//$subListKeywords = xpath:"distinct-values((//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]/replace(concat(normalize-space(replace(.,'((info:eu-repo/classification/[^/]*/)|([^:]*:))(.*)','$4')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(replace(.,'(info:eu-repo/classification/)?([^/:]*)[/:](.*)','$2:$3'),':'))),']'),' \[\]',''))";
64
$subjListKeywordsInfo = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and starts-with(.,'info:eu-repo/classification/')]
65
/replace(concat(normalize-space(replace(.,'info:eu-repo/classification/[^/]*/(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',replace(.,'info:eu-repo/classification/([^/]*)/.*','$1'))),']'),' \[\]','')";
66
$subjListKeywordsColon = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and not(starts-with(.,'info:eu-repo/classification/'))]/replace(concat(normalize-space(replace(.,'[^:]*:(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(.,':'))),']'),' \[\]','')";
67
$subjListKeywordsInfoAndColon = xpath:"distinct-values(insert-before($subjListKeywordsInfo,0,$subjListKeywordsColon))";
68
dc:subject = set(xpath:"$subjListKeywordsInfoAndColon", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
69
//
70
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
71
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
72
dc:contributor = xpath:"//dc:contributor";
73
dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))";
74
$varHttpTest = "''";
75
if xpath:"//dc:identifier[starts-with(., 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord();
76
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
77
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
78
static dr:dateOfTransformation = xpath:"current-dateTime()";
79
dc:type = xpath:"//dc:type";
80
dc:format = xpath:"//dc:format";
81
dc:date = xpath:"//dc:date";
82
dc:language = Convert(xpath:"//dc:language", Languages);
83
//dc:language = "eng";
84
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
85
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
86
if xpath:"//oaf:datasourceprefix[.='od_______883']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date[3]", DateISO8601); else $varDummy= "''";
87
if xpath:"//oaf:datasourceprefix[.='od______3063']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601); else $varDummy= "''";
88
if xpath:"(//oaf:datasourceprefix[.='od______2658'] or //oaf:datasourceprefix[.='od______1318']) and starts-with($varDateAccepted, '1000')" oaf:dateAccepted = $varDummy; else $varDummy= "''";
89
if xpath:"not(//oaf:datasourceprefix[.='od_______883']) and not(//oaf:datasourceprefix[.='od______3063']) and not(starts-with($varDateAccepted, '10') or starts-with($varDateAccepted, '00'))" oaf:dateAccepted = $varDateAccepted; else $varDummy= "''";
90
// apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
91
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')";
92
oaf:embargoenddate = $varEmbargoEnd;
93
// FP7
94
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
95
// ERC (provided by OAPEN)
96
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
97
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
98
// H2020
99
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
100
// FCT
101
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))";
102
// MESTD
103
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
104
// WT
105
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
106
dc:relation = xpath:"//dc:relation";
107
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
108
//
109
//
110
oaf:collectedDatasourceid = xpath:"$varDatasourceid";   
111
//
112
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
113
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
114
if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
115
//
116
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = "EMBARGO"; else $var0 = "''";
117
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''";
118
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))]" oaf:accessrights = "OPEN"; else $var0 = "''";
119
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0" oaf:accessrights = "OPEN"; else $var0 = "''";
120
//
121
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
122
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
123
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/restrictedAccess') ]" oaf:accessrights = "RESTRICTED"; else $var0 = "''";
124
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "OPEN";
125
// oaf:accessrights = xpath:"//dc:rights[   not(starts-with(normalize-space(.), 'info:eu-repo/semantics')) and xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date()]/concat('OPEN')";
126
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')";
127
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())";
128
//
129
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
130
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
131
//
132
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
133
// 1st param: list of xpath expresssions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group
134
$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)');
135
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')";
136
$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)');
137
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
138
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
139
oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";);
140
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
141
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))";
142
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))";
143
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))";
144
$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')";
145
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";);
146
if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''";
147
end</CODE>
15
                    declare_ns oaf = "http://namespace.openaire.eu/oaf";
16
                    declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
17
                    declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
18
                    declare_ns dc = "http://purl.org/dc/elements/1.1/";
19
                    declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
20
                    declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
21
                    declare_ns xs = "http://www.w3.org/2001/XMLSchema";
22
                    $var0 = "''";
23
                    $varFP7 = "'corda_______::'";
24
                    $varH2020 = "'corda__h2020::'";
25
                    $varAKA = "'aka_________::'";     // tbd, no statements yet
26
                    $varAFF = "'aff_________::'";
27
                    $varARC = "'arc_________::'";
28
                    $varCONICYT = "'conicytf____::'";
29
                    $varDFG = "'dfgf________::'";
30
                    $varFCT="'fct_________::'";
31
                    $varFWF = "'fwf_________::'";
32
                    $varGSRT = "'gsrt________::'";
33
                    $varHRZZ = "'irb_hr______::'";
34
                    $varINNOVIRIS = "'innoviris___::'";
35
                    $varMESTD = "'mestd_______::'";
36
                    $varMIUR = "'miur________::'";     // tbd, no statements yet
37
                    $varMZOS = "'irb_hr______::'";
38
                    $varNHMRC = "'nhmrc_______::'";
39
                    $varNIH = "'nih_________::'";
40
                    $varNSF = "'nsf_________::'";
41
                    $varNWO = "'nwo_________::'";
42
                    $varRCUK = "'rcuk________::'";
43
                    $varRIF = "'rif_________::'";
44
                    $varRSF = "'rsf_________::'";
45
                    $varSFI ="'sfi_________::'";
46
                    $varSGOV = "'sgov________::'";
47
                    $varSNSF = "'snsf________::'";
48
                    $varTARA = "'taraexp_____::'";
49
                    $varTUBITAK = "'tubitakf____::'";
50
                    $varWT = "'wt__________::'";
51
                    $varDummy = "''";
52
                    static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
53
                    static $varRepoid = xpath:"//dri:repositoryId";
54
                    static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
55
                    dri:objIdentifier = xpath:"//dri:objIdentifier";
56
                    dri:repositoryId = $varRepoid;
57
                    dri:recordIdentifier = xpath:"//dri:recordIdentifier";
58

  
59
                    // skip test records
60
                    if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = 'popper test archive'] and //dc:creator[lower-case(.) = 'author, test'] and //dc:description[starts-with(lower-case(.), 'a short description of the article')]" dc:title = skipRecord(); else $varDummy= "''";
61
                    if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = ('test doc', 'test_publish', 'test html', 'final_test')] and //dc:description = //dc:title" dc:title = skipRecord(); else $varDummy= "''";
62
                    if xpath:"count(//*[matches(., '^test(test|[\s\d,])*$', 'i')]) &gt;= 2" dc:title = skipRecord(); else $varDummy= "''";
63

  
64
                    if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''";
65
                    apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and not(contains(., 'US National Cancer Institute')) and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
66
                    if xpath:"//dc:title[string-length(.)&gt; 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord();
67
                    apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
68
                    //
69
                    //apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
70
                    //
71
                    // subject
72
                    // gather subjects: from fields setSpec, subject, classification, keywords
73
                    // assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code
74
                    // normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]'
75
                    // remove duplicates: identical pairs of value/term and context/vocabulary
76
                    $subjVocHarv    = xpath:"'acm','bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','pacs','rvk','udc'";            // subject contexts/vocabularies as harvested
77
                    $subjVocCode    = xpath:"'ccs','bicssc','bk','ddc','gok','jel',                       'jel',           'jel',              'jel','lcsh','mesh','msc','pacs','rvk','udc'";            // subject contexts/vocabularies as normed within OpenAIRE
78
                    $subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')";       // regular expression for subject contexts
79
                    $subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values
80
                    $subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')";               // regular expression for subject contexts in field parameters
81
                    // subject context: approved vocabulary/classification/thesaurus in field value
82
                    //dc:subject = set(xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
83
                    $subjListInVal = xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/)?([^/:]*)[:/](.*)','$3')))";
84
                    // subject context: approved vocabulary/classification/thesaurus in field parameter
85
                    //dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
86
                    $subjListInPar = xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1),':',normalize-space(.))";
87
                    // subject context: approved vocabulary/classification/thesaurus in field value or parameter
88
                    $subjListInParAndVal = xpath:"distinct-values(insert-before($subjListInVal,0,$subjListInPar))";
89
                    dc:subject = set(xpath:"$subjListInParAndVal",  @classid=xpath:"substring-before(.,':')";, @classname=xpath:"substring-before(.,':')";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
90
                    // subject context: no (approved) vocabulary/classification/thesaurus
91
                    //dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
92
                    //$subListKeywords = xpath:"distinct-values((//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]/replace(concat(normalize-space(replace(.,'((info:eu-repo/classification/[^/]*/)|([^:]*:))(.*)','$4')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(replace(.,'(info:eu-repo/classification/)?([^/:]*)[/:](.*)','$2:$3'),':'))),']'),' \[\]',''))";
93
                    $subjListKeywordsInfo = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and starts-with(.,'info:eu-repo/classification/')]
94
                    /replace(concat(normalize-space(replace(.,'info:eu-repo/classification/[^/]*/(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',replace(.,'info:eu-repo/classification/([^/]*)/.*','$1'))),']'),' \[\]','')";
95
                    $subjListKeywordsColon = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and not(starts-with(.,'info:eu-repo/classification/'))]/replace(concat(normalize-space(replace(.,'[^:]*:(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(.,':'))),']'),' \[\]','')";
96
                    $subjListKeywordsInfoAndColon = xpath:"distinct-values(insert-before($subjListKeywordsInfo,0,$subjListKeywordsColon))";
97
                    dc:subject = set(xpath:"$subjListKeywordsInfoAndColon", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
98
                    //
99
                    apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
100
                    apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
101
                    dc:contributor = xpath:"//dc:contributor";
102
                    dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))";
103
                    $varHttpTest = "''";
104
                    if xpath:"//dc:identifier[starts-with(normalize-space(.), 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord();
105
                    apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
106
                    dr:dateOfCollection = xpath:"//dri:dateOfCollection";
107
                    static dr:dateOfTransformation = xpath:"current-dateTime()";
108
                    dc:type = xpath:"//dc:type";
109
                    dc:format = xpath:"//dc:format";
110
                    dc:date = xpath:"//dc:date";
111
                    dc:language = Convert(xpath:"//dc:language", Languages);
112
                    //dc:language = "eng";
113
                    //if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
114
                    $varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
115
                    if xpath:"//oaf:datasourceprefix[.='od_______883']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date[3]", DateISO8601); else $varDummy= "''";
116
                    if xpath:"//oaf:datasourceprefix[.='od______3063']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601); else $varDummy= "''";
117
                    if xpath:"(//oaf:datasourceprefix[.='od______2658'] or //oaf:datasourceprefix[.='od______1318']) and starts-with($varDateAccepted, '1000')" oaf:dateAccepted = $varDummy; else $varDummy= "''";
118
                    if xpath:"not(//oaf:datasourceprefix[.='od_______883']) and not(//oaf:datasourceprefix[.='od______3063']) and not(starts-with($varDateAccepted, '10') or starts-with($varDateAccepted, '00'))" oaf:dateAccepted = $varDateAccepted; else $varDummy= "''";
119
                    // apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
120
                    $varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')";
121
                    oaf:embargoenddate = $varEmbargoEnd;
122
                    // FP7
123
                    oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
124
                    // ERC (provided by OAPEN)
125
                    oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
126
                    oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
127
                    // H2020
128
                    oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
129
                    // AFF
130
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))";
131
                    // AKA \d*
132
                    //oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAKA, replace(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)(/.*)?', '$1', 'i')))";
133
                    // ARC ([A-Z]+[\d/]*|\d+)
134
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))";
135
                    // CONICYT \d{7,8}
136
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))";
137
                    // DFG \d{7,9}
138
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))";
139
                    // FCT (SFRH/BD/)(\d+)(/\d+) ... ((SFRH|PRAXIS XXI|PD|FMRH)/[A-Z]*/)?\d*(/\d*)? ...
140
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
141
                    // FWF [A-Z]{1,3} \d*
142
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))";
143
                    // GSRT
144
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varGSRT, replace(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
145
                    // HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)?
146
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))";
147
                    // INNOVIRIS
148
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/[^/]+', 'i')]/concat($varINNOVIRIS, replace(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
149
                    // MESTD \d*
150
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mestd/[^/]*/\d+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), 'info:eu-repo/grantagreement/mestd/[^/]*/(\d+)(/.*)?', '$1', 'i'))";
151
                    // MIUR [A-Z0-9]*
152
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?[A-Z0-9]*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMIUR, replace(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?([A-Z0-9]*).*?', '$1', 'i'))";
153
                    // MZOS \d{3}-\d{7}-\d{4}
154
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4})', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))";
155
                    // NHMRC \d{3,6}
156
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/[^/]*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/[^/]*/.*?(\d{3,6}).*?', '$1', 'i'))";
157
                    // NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&amp;?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1) ... hm
158
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/[^/]+', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
159
                    // NSF (\d{7}|\d{2}[A-Z]\d{4})
160
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/[^/]+', 'i')]/concat($varNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
161
                    // NWO
162
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/[^/]+', 'i')]/concat($varNWO, replace(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
163
                    // RCUK
164
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/[^/]+', 'i')]/concat($varRCUK, replace(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
165
                    // RIF
166
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/[^/]+', 'i')]/concat($varRIF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
167
                    // RSF
168
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/[^/]+', 'i')]/concat($varRSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
169
                    // SFI
170
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/[^/]+', 'i')]/concat($varSFI, replace(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
171
                    // SGOV
172
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/[^/]+', 'i')]/concat($varSGOV, replace(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
173
                    // SNSF
174
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
175
                    // TARA
176
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/[^/]+', 'i')]/concat($varTARA, replace(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
177
                    // TUBITAK \d{3}[A-Z]\d{2,3}
178
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/.*?\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/.*?(\d{3}[A-Z]\d{2,3})/.*?', '$1', 'i'))";
179
                    // WT [^\s]*
180
                    oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/(wellcome trust|wt)/[^/]*/[^\s/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), 'info:eu-repo/grantagreement/(wellcome trust|wt)/[^/]*/([^\s/]*).*?', '$1', 'i'))";
181
                    // WT
182
                    //oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
183

  
184
                    dc:relation = xpath:"//dc:relation";
185
                    //comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
186
                    //
187
                    //
188
                    oaf:collectedDatasourceid = xpath:"$varDatasourceid";
189
                    //
190
                    // dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
191
                    // if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
192
                    // if xpath:"//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
193
                    //$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
194
                    //$varCobjCategory = Convert(xpath:"(reverse((//dc:type | //oai:setSpec)[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']) | (//dc:type | //oai:setSpec)[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other')) and not(//oaf:datasourceprefix/lower-case(.) = 'openedition_')])", TextTypologies);
195
                    //insert-before - del
196
                    //$varCobjCategory = Convert(xpath:"insert-before((reverse((//dc:type | //oai:setSpec)[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'])), 100, ( (//dc:type | //oai:setSpec)[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other')) and not(//oaf:datasourceprefix/lower-case(.) = 'openedition_')]))", TextTypologies);
197

  
198
                    $varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0,  reverse(//oai:setSpec))", TextTypologies);
199
                    $varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes);
200
                    dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;);
201

  
202
                    $varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies);
203
                    $varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes);
204
                    dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_') and (not(//oaf:datasourceprefix/lower-case(.) = 'od________65'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;);
205

  
206
                    // CERN CDS when dc:type or setSpec explicitly states resource type
207
                    // (currently :CONF not covered as not included in vocabulary, and as landing in literature already; other sets might also be addressed, depending on marked resource types)
208
                    $varCobjCategoryCernExplicit = Convert(xpath:"normalize-space((//dc:type, //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]/tokenize(., ':')[2])[1])", TextTypologies);
209
                    $varSuperTypeCernExplicit = Convert(xpath:"normalize-space($varCobjCategoryCernExplicit)", SuperTypes);
210
                    dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and (//dc:type or //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernExplicit", @type = $varSuperTypeCernExplicit;);
211

  
212
                    // CERN CDS when set vaguely hints on literature
213
                    $varCobjCategoryCernVague = xpath:"//oaf:datasourceprefix[not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]) and //*[local-name() = 'setSpec'][ends-with(., ':FULLTEXT')]]/'0038'";
214
                    $varSuperTypeCernVague = Convert(xpath:"normalize-space($varCobjCategoryCernVague)", SuperTypes);
215
                    dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernVague", @type = $varSuperTypeCernVague;);
216

  
217
                    // CERN CDS when no hint
218
                    $varCobjCategoryCernUnknown = xpath:"//oaf:datasourceprefix[not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT') or ends-with(., ':FULLTEXT')])]/'0000'";
219
                    $varSuperTypeCernUnknown = Convert(xpath:"normalize-space($varCobjCategoryCernUnknown)", SuperTypes);
220
                    dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernUnknown", @type = $varSuperTypeCernUnknown;);
221

  
222
                    //
223
                    if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()) and not(//oaf:datasourceprefix = 'od_______151')" rights = "EMBARGO"; else $var0 = "''";
224
                    if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] or (//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''";
225
                    if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "OPEN"; else $var0 = "''";
226
                    if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0 and not($varDatasourceid = ('opendoar____::3532', 'opendoar____::109',  'opendoar____::151'))" oaf:accessrights = "OPEN"; else $var0 = "''";
227
                    //
228
                    // apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
229
                    // apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
230
                    // if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/restrictedAccess') ]" oaf:accessrights = "RESTRICTED"; else $var0 = "''";
231
                    // if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "OPEN";
232
                    // oaf:accessrights = xpath:"//dc:rights[   not(starts-with(normalize-space(.), 'info:eu-repo/semantics')) and xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date()]/concat('OPEN')";
233
                    // oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')";
234
                    // oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())";
235
                    if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'fulltext'" oaf:accessrights = "OPEN"; else $var0 = "''";
236
                    if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'abstractOnly'" oaf:accessrights = "CLOSED"; else $var0 = "''";
237
                    if xpath:"$varDatasourceid = 'opendoar____::3532' and not(//dc:format = ('fulltext', 'abstractOnly'))" oaf:accessrights = "UNKNOWN"; else $var0 = "''";
238
                    if xpath:"$varDatasourceid = 'opendoar____::109'" oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights[starts-with(., 'http')][1])", AccessRights); else $var0 = "''";
239
                    //
240
                    //oaf:license = xpath:"//dc:rights[contains (., 'http://creativecommons.org/licenses/') or contains(., 'http://opensource.org/licenses/')]";
241
                    oaf:license = xpath:"//dc:rights[starts-with (., 'http') and contains(., 'license')]";
242
                    //
243
                    static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
244
                    static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
245
                    //
246
                    $varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
247
                    // 1st param: list of xpath expresssions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group
248
                    $varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)');
249
                    // $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')";
250
                    $varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)');
251
                    $varIsbn = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2097'] and starts-with(., '978') or starts-with(., '979')]";
252
                    $varHandle = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______2097'] and starts-with(., 'http://hdl.handle.net/')]/substring-after(., 'http://hdl.handle.net/')";
253
                    oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
254
                    oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
255
                    oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";);
256
                    oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";);
257
                    oaf:identifier = set(xpath:"$varHandle", @identifierType = "handle";);
258
                    oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
259

  
260
                    // journal data; PURE exposes ISSN in field ns2:isPartOf
261

  
262
                    //$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))";
263
                    //$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))";
264
                    //$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]";
265
                    $varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';')";
266
                    //$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';'), //dc:source[//*[local-name()='isPartOf']][matches(., '.*'.*')]";
267

  
268
                    //$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))";
269
                    //$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')";
270
                    //$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:source[//oaf:datasourceprefix='issn20381026'][matches(.,'\d\d\d\d-\d\d\d\d')][1], //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.='od______2097'] and matches(., '\d{4}-\d{3}[\dX]')]";
271
                    //$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]";
272
                    $varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1], //*[local-name()='isPartOf'][starts-with(., 'urn:ISSN:')]/substring-after(., 'urn:ISSN:')";
273

  
274
                    $varEISSN = xpath:"//dc:relation[starts-with(., 'eissn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/eissn/')]/replace(normalize-space(substring-after(., 'eissn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')";
275
                    //oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";);
276

  
277
                    //to be improved: many identical checks
278
                    //$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')";
279
                    $varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1'), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', vol. ')]/normalize-space(substring-before(substring-after(., ', vol. '), ','))";
280
                    $varIss = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(substring-before(substring-after(., 'Nr.'), ','))";
281
                    //$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])";
282
                    $varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. (\d*)-\d*[\s,\.;].*$', '$1')";
283
                    //$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])";
284
                    $varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. \d*-(\d*)[\s,\.;].*$', '$1')";
285

  
286
                    //to be improved: many empty attributes
287
                    oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";);
288

  
289
                    if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''";
290

  
291
                    if xpath:"//oaf:datasourceprefix[.='od______3848'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[ends-with(lower-case(normalize-space(.)), '.pdf')][starts-with(lower-case(normalize-space(.)), 'https://cris.cumulus.vub.ac.be/')]"; else $varDummy= "''";
292
                    if xpath:"//oaf:datasourceprefix[.='doaj21976775' or .='issn21976775'] and //dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]" oaf:fulltext = xpath:"concat(//dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]/normalize-space(.), '/pdf')"; else $varDummy= "''";
293
                    apply xpath:"//dc:relation[starts-with(., 'https://etalpykla.lituanistikadb.lt/fedora/get/')][//oaf:datasourceprefix[.='od______2712']]" if xpath:"true()" oaf:fulltext = xpath:"normalize-space(.)"; else $varDummy = "''";
294
                    if xpath:"//oaf:datasourceprefix[.='od______4149'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[contains(lower-case(normalize-space(.)), '/datastream/')]"; else $varDummy= "''";
295

  
296
                    // community
297
                    // concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
298
                    //$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')";
299
                    $varCommunityAtt = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-after(., 'url:')";
300
                    $varCommunityVal = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-before(., 'url:')";
301
                    //oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;);
302
                    oaf:concept = set(xpath:"$varCommunityVal", @id = xpath:"subsequence($varCommunityAtt,position(),1)";);
303

  
304
                    end</CODE>
148 305
            </SCRIPT>
149 306
        </CONFIGURATION>
150 307
        <STATUS/>

Also available in: Unified diff