1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="2e189df5-0985-444d-aa51-702f2a62176b_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
|
4
|
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
|
5
|
<RESOURCE_KIND value="TransformationRuleDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2016-09-07T10:02:27+00:00"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
<IMPORTED/>
|
12
|
<SCRIPT>
|
13
|
<TITLE>dc_cleaning_OPENAIREplus_erc</TITLE>
|
14
|
<CODE>declare_script "dc_cleaning_OpenAIREplus_erc";
|
15
|
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
16
|
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
17
|
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
18
|
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
19
|
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
20
|
declare_ns cr = "http://www.crossref.org/qrschema/2.0";
|
21
|
$var0 = "''";
|
22
|
$var1 = "'corda_______::'";
|
23
|
$varDummy = "''";
|
24
|
$varUnknownRepoId = "'openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18'";
|
25
|
$varUnknownRepoName = "'Unknown Repository'";
|
26
|
$varCrossRefAuthor = xpath:"//cr:crossref_result//cr:contributors/cr:contributor[@contributor_role='author'][not(./cr:surname=concat(codepoints-to-string(38), 'NA;'))]";
|
27
|
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
28
|
static $varRepoid = xpath:"//dri:repositoryId";
|
29
|
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
|
30
|
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
31
|
dri:repositoryId = $varRepoid;
|
32
|
//dri:repositoryId = xpath:"//dri:repositoryId";
|
33
|
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
|
34
|
apply xpath:"//cr:crossref_result//cr:contributors/cr:contributor[@contributor_role='author'][not(./cr:surname=concat(codepoints-to-string(38), 'NA;'))]" if xpath:"true()" dc:creator = xpath:"concat(./cr:surname, ', ', ./cr:given_name)"; else $varDummy = "''";
|
35
|
// $varAuthors = xpath:"normalize-space(replace( replace( replace(//PUB_AUTHORS, codepoints-to-string(38), 'and') , '(\[.*\])|(\(.*\))|(et\.?\s*al\.?)', '') , '(\\x)(\S\S)', concat(codepoints-to-string((38)), '#', $2, ';') ))";
|
36
|
// $varAuthors = xpath:"normalize-space(replace( replace( replace(replace(replace(replace(replace(replace(replace(replace( replace( replace(//PUB_AUTHORS, codepoints-to-string(38), 'and') , '(\[.*\])|(\(.*\))|(et\.?\s*al\.?)', '') , '(\\xe4)', 'ä' ), '(\\xb4)', '´'), '(\\xf1)', 'ñ'), '(\\xe1)', 'á' ), '(\\xed)', 'í'), '(\\xe2)', 'â'), '(\\xf6)', 'ö' ), '(\\xf3)', 'ó'), '\\u2019', codepoints-to-string((8217))), '\d|\\u2020|\*', '' ))";
|
37
|
$varAuthors = xpath:"normalize-space(replace(replace(replace( replace( replace(replace(replace(replace(replace(replace(replace(replace(replace( replace( replace(//PUB_AUTHORS, codepoints-to-string(38), 'and') , '(\[.*\])|(\(.*\))|(et\.?\s*al\.?)', '') , '(\\xe4)', 'ä' ), '(\\xb4)', '´'), '(\\xf1)', 'ñ'), '(\\xe1)', 'á' ), '(\\xed)', 'í'), '(\\xe2)', 'â'), '(\\xe5)', 'å'), '(\\xf6)', 'ö' ), '(\\xf3)', 'ó'), '\\u0142', codepoints-to-string((322))), '\\u0144', codepoints-to-string((324))), '\\u2019', codepoints-to-string((8217))), '\d|\\u2020|\*', '' ))";
|
38
|
if xpath:"contains($varAuthors, '; ') and contains($varAuthors, ' / ') and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s*;\s*|\s*/\s*')"; else $varDummy = "''";
|
39
|
if xpath:"not(contains($varAuthors, '; ')) and contains($varAuthors, ' / ') and not($varCrossRefAuthor)" dc:creator = xpath:"normalize-space($varAuthors)"; else $varDummy = "''";
|
40
|
if xpath:"contains($varAuthors, ';') and not(contains($varAuthors, ' / ')) and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s*;\s*')"; else $varDummy = "''";
|
41
|
//
|
42
|
if xpath:"contains($varAuthors, '., ') and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s*\.\s*, and |\s*\.,\s*')"; else $varDummy = "''";
|
43
|
//
|
44
|
// Sarajlic, Eldar
|
45
|
// Dykstra, P. A.
|
46
|
// ^[^,]*(,)[^,]*$
|
47
|
if xpath:"contains($varAuthors, ', and ') and not(contains($varAuthors, '., ')) and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s*, and\s*|\s*,\s*')"; else $varDummy = "''";
|
48
|
// Monga, Mattia and Sicari, Sabrina
|
49
|
// Rojek J. and O\xf1ate E.
|
50
|
if xpath:"matches($varAuthors, '(\S+,?\s*\S+)((\s*and\s*)(\S+,?\s*\S+))+') and contains($varAuthors, ' and ') and not(contains($varAuthors, ', and ')) and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s+and\s+')"; else $varDummy = "''";
|
51
|
// ^[^,]*,\s*[^,]\S\s*?$
|
52
|
if xpath:"matches($varAuthors, '^[^,]*(,)[^,]*$') and contains($varAuthors, ',') and not(contains($varAuthors, ' and ') or contains($varAuthors, '., ') or contains($varAuthors, ' / ')) and not($varCrossRefAuthor)" dc:creator = xpath:"$varAuthors"; else $varDummy = "''";
|
53
|
// here
|
54
|
if xpath:"not(matches($varAuthors, '(\S+,\s*\S+)((\s*and\s*)(\S+,\s*\S+))+') ) and not(matches($varAuthors, '^[^,]*(,)[^,]*$')) and contains($varAuthors, ',') and not(contains($varAuthors, '., ') or contains($varAuthors, '; ') or contains($varAuthors, ' / ')) and not($varCrossRefAuthor)" dc:creator = xpath:"tokenize($varAuthors, '\s*,\s*|\s+and\s+')"; else $varDummy = "''";
|
55
|
// Dykstra, P. A. // Schuler B, Hofmann H.
|
56
|
//
|
57
|
// if xpath:"count(tokenize(., ',')) < 3 and contains(//PUB_AUTHORS, ', ') and not(contains(//PUB_AUTHORS, ' and ') or contains(//PUB_AUTHORS, '., ') or contains(//PUB_AUTHORS, ' / ')) and not($varCrossRefAuthor)" dc:creator = xpath:"//PUB_AUTHORS/normalize-space(.)"; else $varDummy = "''";
|
58
|
//
|
59
|
// Leticia Sabsay
|
60
|
if xpath:"not(contains($varAuthors, ',')) and not(contains($varAuthors, '(')) and not(contains($varAuthors, ' and ')) and not(contains($varAuthors, '; ') or contains($varAuthors, ';')) and not($varCrossRefAuthor)" dc:creator = xpath:"normalize-space($varAuthors)"; else $varDummy = "''";
|
61
|
// if xpath:"not(contains(//PUB_AUTHORS, ',')) and not(contains(//PUB_AUTHORS, '(')) and not(contains(//PUB_AUTHORS, ' and ')) and not(contains(//PUB_AUTHORS, '; ') or contains(//PUB_AUTHORS, ';')) and not($varCrossRefAuthor)" dc:creator = xpath:"//PUB_AUTHORS/concat(tokenize(., '\s')[last()], ', ', normalize-space( replace(normalize-space(), tokenize(., '\s')[last()], '') ))"; else $varDummy = "''";
|
62
|
// if xpath:"not(contains(//PUB_AUTHORS, ' ')) and not(contains(//PUB_AUTHORS, ',')) and not(contains(//PUB_AUTHORS, '(')) and not(contains(//PUB_AUTHORS, ' and ')) and not(contains(//PUB_AUTHORS, '; ')) and not($varCrossRefAuthor)" dc:creator = xpath:"//PUB_AUTHORS/normalize-space(.)"; else $varDummy = "''";
|
63
|
//
|
64
|
// if xpath:"contains(//PUB_AUTHORS, '(') and not(contains(//PUB_AUTHORS, ',')) and not(contains(//PUB_AUTHORS, ' and ')) and not(contains(//PUB_AUTHORS, '; ')) and not($varCrossRefAuthor)" dc:creator = xpath:"//PUB_AUTHORS/normalize-space(substring-before(., '('))"; else $varDummy = "''";
|
65
|
//
|
66
|
dc:title = xpath:"//PUB_TITLE/normalize-space(.)";
|
67
|
//apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
68
|
//apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
|
69
|
dc:source = xpath:"//cr:series_title | //cr:journal_title";
|
70
|
//dc:contributor = xpath:"//dc:contributor";
|
71
|
//dc:description = xpath:"//dc:description";
|
72
|
$varHttpTest = "''";
|
73
|
apply xpath:"//cr:doi" if xpath:". and not(//url)" dc:identifier = xpath:"concat('http://dx.doi.org/', .)"; else $varDummy = "''";
|
74
|
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
75
|
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
76
|
dc:type = xpath:"//cr:doi/@type";
|
77
|
//dc:language = Convert(xpath:"//dc:language", Languages);
|
78
|
dc:language = "eng";
|
79
|
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
|
80
|
dc:date = xpath:"//PUB_DATE_PUBLICATION";
|
81
|
oaf:dateAccepted = Convert(xpath:"descendant-or-self::PUB_DATE_PUBLICATION", DateISO8601, "yyyy-MM-dd", "min()");
|
82
|
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
|
83
|
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
84
|
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
85
|
//
|
86
|
oaf:projectid=xpath:"concat($var1, //PROJECT_ID)";
|
87
|
//
|
88
|
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
89
|
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
90
|
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
91
|
//
|
92
|
//
|
93
|
//
|
94
|
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
|
95
|
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
96
|
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
97
|
//
|
98
|
//apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
|
99
|
apply xpath:"//cr:doi" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(./@type)", TextTypologies); else dc:type = xpath:".";
|
100
|
if xpath:"not(//cr:doi)" dr:CobjCategory = "0001"; else $varDummy = "''";
|
101
|
if xpath:"//PUB_ACCESS_OPEN = 'true'" oaf:accessrights = "OPEN"; else oaf:accessrights = "CLOSED";
|
102
|
//
|
103
|
$varISSN = "''";
|
104
|
if xpath:"starts-with(//PUB_PERMANENT_ID, 'ISSN ')" $varISSN = xpath:"normalize-space(substring-after(//PUB_PERMANENT_ID, 'ISSN '))"; else $varDummy = "''";
|
105
|
if xpath:"//cr:issn[@type='print'][1]" $varISSN = xpath:"replace(//cr:issn[@type='print'][1], '(\d{4})(.*)(\d{4}|\d{3}X)', '$1-$3')"; else $varDummy = "''";
|
106
|
$varEISSN = xpath:"//cr:issn[@type='electronic']";
|
107
|
$varJournal = "''";
|
108
|
if xpath:"//cr:journal_title | //cr:series_title" $varJournal = xpath:"//cr:journal_title | //cr:series_title"; else $varJournal = xpath:"//PERIODIC_TITLE";
|
109
|
oaf:journal = set($varJournal, @issn=$varISSN;);
|
110
|
//
|
111
|
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
112
|
// static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
113
|
static oaf:hostedBy = set("''", @name="Unknown Repository";, @id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";);
|
114
|
//
|
115
|
// $varId = identifierExtract('["//PUB_PERMANENT_ID", "//PERIODIC_NUMBER", "//PUB_RELEVANT_PAGES", "//PERIODIC_TITLE", "//PUB_TITLE"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+[^\).$])');
|
116
|
// oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
|
117
|
oaf:identifier = set(xpath:"//cr:doi", @identifierType = "doi";);
|
118
|
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
119
|
end</CODE>
|
120
|
</SCRIPT>
|
121
|
</CONFIGURATION>
|
122
|
<STATUS/>
|
123
|
<SECURITY_PARAMETERS/>
|
124
|
</BODY>
|
125
|
</RESOURCE_PROFILE>
|