Revision 50508
Added by Sandro La Bruzzo about 6 years ago
dc2oaf_cleaning_OPENAIREplus_puma.xml | ||
---|---|---|
4 | 4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
5 | 5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
6 | 6 |
<RESOURCE_URI value=""/> |
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
|
|
7 |
<DATE_OF_CREATION value="2016-03-10T19:09:16+00:00"/>
|
|
8 | 8 |
</HEADER> |
9 | 9 |
<BODY> |
10 | 10 |
<CONFIGURATION> |
11 | 11 |
<IMPORTED/> |
12 | 12 |
<SCRIPT> |
13 | 13 |
<TITLE>dc_cleaning_OPENAIREplus_puma</TITLE> |
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_puma"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::$1'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
30 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
31 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
dc:contributor = xpath:"//dc:contributor"; |
|
36 |
dc:description = xpath:"//dc:description"; |
|
37 |
$varHttpTest = "''"; |
|
38 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
39 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
40 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
41 |
// dc:type = xpath:"//dc:type"; |
|
42 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
43 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
44 |
dc:date = xpath:"//dc:date"; |
|
45 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
46 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
47 |
$varPrj1 = RegExpr(xpath:"//dc:relation[1][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
48 |
$varPrj2 = RegExpr(xpath:"//dc:relation[2][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
49 |
$varPrj3 = RegExpr(xpath:"//dc:relation[3][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
50 |
$varPrj4 = RegExpr(xpath:"//dc:relation[4][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
51 |
$varPrj5 = RegExpr(xpath:"//dc:relation[5][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
52 |
$varPrj6 = RegExpr(xpath:"//dc:relation[6][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]", $var1, "s/^.*info:eu-repo\/grantAgreement\/EC\/FP7\/(\d\d\d\d\d\d).*//gmi"); |
|
53 |
if xpath:"string-length($varPrj1) = 20" oaf:projectid = $varPrj1; else $var0 = "''"; |
|
54 |
if xpath:"string-length($varPrj2) = 20" oaf:projectid = $varPrj2; else $var0 = "''"; |
|
55 |
if xpath:"string-length($varPrj3) = 20" oaf:projectid = $varPrj3; else $var0 = "''"; |
|
56 |
if xpath:"string-length($varPrj4) = 20" oaf:projectid = $varPrj4; else $var0 = "''"; |
|
57 |
if xpath:"string-length($varPrj5) = 20" oaf:projectid = $varPrj5; else $var0 = "''"; |
|
58 |
if xpath:"string-length($varPrj6) = 20" oaf:projectid = $varPrj6; else $var0 = "''"; |
|
59 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gm"); else dc:relation = xpath:"normalize-space(.)"; |
|
60 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gm"); else dc:relation = xpath:"normalize-space(.)"; |
|
61 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
62 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
63 |
// |
|
64 |
// |
|
65 |
// |
|
66 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
67 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
68 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
69 |
// |
|
70 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
71 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
72 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
73 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
74 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
75 |
// |
|
76 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
77 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
78 |
// |
|
79 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
80 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
81 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
82 |
end |
|
83 |
]]></CODE> |
|
14 |
<CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_puma"; |
|
15 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
16 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
17 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
18 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
19 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
20 |
$var0 = "''"; |
|
21 |
$varFP7 = "'corda_______::'"; |
|
22 |
$varH2020 = "'corda__h2020::'"; |
|
23 |
$varFCT="'fct_________::'"; |
|
24 |
$varCorda = "'corda_______::'"; |
|
25 |
$varDummy = "''"; |
|
26 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
27 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
28 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]); |
|
29 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
30 |
dri:repositoryId = $varRepoid; |
|
31 |
dri:recordIdentifier = xpath:"//dri:recordIdentifier"; |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
if xpath:"//dc:title[string-length(.)> 0]" $varDummy = "''"; else dc:coverage = skipRecord(); |
|
34 |
dc:title = xpath:"//dc:title[string-length(.) > 0]/normalize-space(.)"; |
|
35 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
38 |
dc:contributor = xpath:"//dc:contributor"; |
|
39 |
dc:description = xpath:"//dc:description"; |
|
40 |
dc:format = xpath:"//dc:format"; |
|
41 |
$varHttpTest = "''"; |
|
42 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
43 |
//apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
44 |
// |
|
45 |
if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']/preceding::dc:identifier[starts-with(., 'http')][ends-with(., '.pdf')]" dc:identifier = xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']/preceding::dc:identifier[starts-with(., 'http')][ends-with(., '.pdf')]"; else dc:identifier = xpath:"//dc:identifier[starts-with(., 'http')]"; |
|
46 |
// |
|
47 |
dr:dateOfCollection = xpath:"//dri:dateOfCollection"; |
|
48 |
static dr:dateOfTransformation = xpath:"current-dateTime()"; |
|
49 |
dc:type = xpath:"//dc:type"; |
|
50 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
51 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
52 |
dc:date = xpath:"//dc:date"; |
|
53 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
54 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
55 |
// FP7 |
|
56 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')]/concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"; |
|
57 |
// H2020 |
|
58 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')]/concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"; |
|
59 |
// FCT |
|
60 |
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', 'i')]/concat($varFCT, replace(normalize-space(.), '(info:eu-repo/grantagreement/fct)/(.+)/(\d+)(.*)', '$3', 'i'))"; |
|
61 |
dc:relation = xpath:"//dc:relation"; |
|
62 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
63 |
// |
|
64 |
oaf:collectedDatasourceid = xpath:"$varDatasourceid"; |
|
65 |
// |
|
66 |
dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies); |
|
67 |
dc:rights = xpath:"//dc:rights"; |
|
68 |
if xpath:"not(//dc:rights[text()='info:eu-repo/semantics/openAccess'])" oaf:accessrights = Convert(xpath:"//dc:rights", AccessRights); else oaf:accessrights = "OPEN"; |
|
69 |
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
70 |
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
71 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
72 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
73 |
// |
|
74 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
75 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
76 |
// |
|
77 |
// $varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)'); |
|
78 |
$varId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/doi/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/doi/)(10.*)'); |
|
79 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
80 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
81 |
end</CODE> |
|
84 | 82 |
</SCRIPT> |
85 | 83 |
</CONFIGURATION> |
86 | 84 |
<STATUS/> |
87 | 85 |
<SECURITY_PARAMETERS/> |
88 | 86 |
</BODY> |
89 |
</RESOURCE_PROFILE> |
|
87 |
</RESOURCE_PROFILE> |
Also available in: Unified diff
imported corrected version of puma transformation rule