Project

General

Profile

« Previous | Next » 

Revision 54066

include further funders, soften funding stream pattern (let pass EC programmes like FP7-...), add superTypes

View differences:

modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc_cleaning_OPENAIREplus_compliant_dcidentifier_last.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="50cbbb8f-b9d4-486d-9895-a80ca7b963b9_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-04-10T15:35:05+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_dcidentifier_last</TITLE>
14
                <CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_dcidentifier_last";
15
declare_ns oaf = "http://namespace.openaire.eu/oaf";
16
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
17
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
18
declare_ns dc = "http://purl.org/dc/elements/1.1/";
19
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
20
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
21
$var0 = "''";
22

  
23
//$var1 = "'corda_______::'";
24
//$var2 = "'corda__h2020::'";
25
$varFP7 = "'corda_______::'";
26
$varH2020 = "'corda__h2020::'";
27
$varAKA = "'aka_________::'";     // tbd, no statements yet
28
$varAFF = "'aff_________::'";
29
$varARC = "'arc_________::'";
30
$varCONICYT = "'conicytf____::'";
31
$varDFG = "'dfgf________::'";
32
$varFCT="'fct_________::'";
33
$varFWF = "'fwf_________::'";
34
$varHRZZ = "'irb_hr______::'"; 
35
$varMESTD = "'mestd_______::'";
36
$varMIUR = "'miur________::'";     // tbd, no statements yet
37
$varMZOS = "'irb_hr______::'";
38
$varNHMRC = "'nhmrc_______::'";
39
$varNIH = "'nih_________::'";
40
$varNSF = "'nsf_________::'";
41
$varNWO = "'nwo_________::'";
42
$varRCUK = "'rcuk________::'";
43
$varSFI ="'sfi_________::'";
44
$varSGOV = "'sgov________::'";
45
$varSNSF = "'snsf________::'";
46
$varTARA = "'taraexp_____::'";
47
$varTUBITAK = "'tubitakf____::'";
48
$varWT = "'wt__________::'";
49

  
50
$varDummy = "''";
51
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
52
static $varRepoid = xpath:"//dri:repositoryId";
53
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
54
dri:objIdentifier = xpath:"//dri:objIdentifier";
55
dri:repositoryId = $varRepoid;
56
// this can be made easier
57
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
58
apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
59
if xpath:"//dc:title[string-length(.)&gt; 0]" $varDummy = "''"; else dc:coverage = skipRecord();
60
dc:title = xpath:"//dc:title[string-length(.) &gt; 0]/normalize-space(.)";
61
apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
62
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
63
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
64
dc:contributor = xpath:"//dc:contributor";
65
dc:description = xpath:"//dc:description";
66
dc:format = xpath:"//dc:format";
67
$varHttpTest = "''";
68
if xpath:"//dc:identifier[starts-with(., 'http')][last()]" $varHttpTest = "true"; else dc:coverage = skipRecord();
69
apply xpath:"//dc:identifier" if xpath:"compare(normalize-space(.), //dc:identifier[starts-with(., 'http')][last()]) = 0" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
70
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
71
static dr:dateOfTransformation = xpath:"current-dateTime()";
72
// dc:type = xpath:"//dc:type";
73
dc:language = Convert(xpath:"//dc:language", Languages);
74
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
75

  
76
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
77
dc:date = xpath:"//dc:date";
78
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
79
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
80

  
81
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 or string-length(substring-before(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/'), '/')) = 6" oaf:projectid = RegExpr(xpath:"substring(normalize-space(.),1,41)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else $varDummy = "''";
82
//dc:relation = xpath:"normalize-space(.)";
83
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/H2020/')) = 6 or string-length(substring-before(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/H2020/'), '/')) = 6" oaf:projectid = RegExpr(xpath:"substring(normalize-space(.),1,43)", $var2, "s/^(.*info:eu-repo\/grantAgreement\/EC\/H2020\/)//gmi"); else $varDummy = "''";
84

  
85
// FP7
86
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', '$4', 'i')))";
87
// ERC (provided by OAPEN)
88
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
89
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
90
// H2020
91
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020(-[^/]*)?/)(\d\d\d\d\d\d)(.*)', '$4', 'i')))";
92
// FCT
93
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i')))";
94
// MESTD
95
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
96
// WT
97
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
98
// AFF
99
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))";
100
// ARC ([A-Z]+[\d/]*|\d+)
101
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))";
102
// CONICYT \d{7,8}
103
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))";
104
// DFG \d{7,9}
105
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))";
106
// FCT
107
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/.*/.*?(\d+).*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]*\d+[^/]*)(/.*)*$', '$1', 'i'))";
108
// FWF [A-Z]{1,3} \d*
109
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/.*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))";
110
// HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)?
111
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))";
112
// MESTD
113
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
114
// MZOS \d{3}-\d{7}-\d{4}
115
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/.*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))";
116
// NHMRC \d{3,6}
117
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', '$1', 'i'))";
118
// NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&amp;?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)
119
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-?[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/.*/.*?([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S.?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1)', '$1', 'i'))";
120
// NSF (\d{7}|\d{2}[A-Z]\d{4})
121
//
122
// SNSF
123
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
124
// TUBITAK
125
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/(\d{3}[A-Z]\d{2,3})(/.*)?', '$1', 'i'))";
126
// WT
127
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i'))";
128

  
129
apply xpath:"//dc:relation" if xpath:"not(contains(., 'info:eu-repo/grantAgreement/'))" dc:relation = xpath:"normalize-space(.)"; else $varDummy = "''";
130

  
131
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
132
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
133
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
134
//
135
//
136
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
137
//
138

  
139
//apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
140
$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
141
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes);
142
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;);
143

  
144
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
145
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
146
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
147
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
148
//
149
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
150
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
151
//
152
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
153
$varIsbn = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______4039'] and starts-with(., '978-') or starts-with(., '979-')]";
154
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
155
oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";);
156
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
157

  
158
$varISSN = xpath:"//dc:relation[matches(normalize-space(.), '^\d{4}-\d{3}[\dX]$')]/normalize-space(.)";
159
$varJournalTitle = xpath:"//dc:relation[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')]][1]/replace(., '([^,;.]*)[,;.].*', '$1')";
160
$varJournalSp = xpath:"//dc:format[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')] and matches(., 'S\. \d*-\d*')]/substring-before(substring-after(., 'S. '), '-')";
161
$varJournalEp = xpath:"//dc:format[//oaf:datasourceprefix[.='od______4039'] and //dc:relation[matches(normalize-space(.), '\d{4}-\d{3}[\dX]')] and matches(., 'S\. \d*-\d*')]/substring-after(., '-')";
162
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @sp = xpath:"$varJournalSp";, @ep = xpath:"$varJournalEp";);
163

  
164
end</CODE>
165
            </SCRIPT>
166
        </CONFIGURATION>
167
        <STATUS/>
168
        <SECURITY_PARAMETERS/>
169
    </BODY>
170
</RESOURCE_PROFILE>

Also available in: Unified diff