Revision 36719
Added by Claudio Atzori about 9 years ago
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_ubiquitypress.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b7773110-4beb-45f7-adf1-9cb4037fad58_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2014-10-23T09:58:00+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_ubiquitypress</TITLE> |
|
14 |
<CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_ubiquitypress"; |
|
15 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
16 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
17 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
18 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
19 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
20 |
declare_ns oai = "http://www.openarchives.org/OAI/2.0/"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
30 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and contains(., ';')" dc:creator = xpath:"substring-before(normalize-space(.), ';')"; else $varDummy = "''"; |
|
31 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and not(contains(., ';'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
// apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http')" oaf:fulltext = xpath:"replace(normalize-space(.),'article/view', 'article/download')"; else $varDummy = "''"; |
|
37 |
oaf:fulltext = xpath:"replace(//dc:relation[count(//dc:format[.='application/pdf']/preceding-sibling::dc:format)+1], 'article/view', 'article/download')"; |
|
38 |
// oaf:fulltext = xpath:"count(//dc:format[.='application/pdf']/preceding-sibling::dc:format)"; |
|
39 |
dc:contributor = xpath:"//dc:contributor"; |
|
40 |
dc:description = xpath:"//dc:description"; |
|
41 |
$varHttpTest = "''"; |
|
42 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
43 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
44 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
45 |
// dc:type = xpath:"//dc:type"; |
|
46 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
47 |
//dc:language = "eng"; |
|
48 |
dc:date = xpath:"//dc:date"; |
|
49 |
dc:format = xpath:"//dc:format"; |
|
50 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
51 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
52 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
53 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
54 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
55 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
56 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
57 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
58 |
// |
|
59 |
// |
|
60 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
61 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
62 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
63 |
// |
|
64 |
apply xpath:"//dc:type" if xpath:". and not(//oai:setSpec[.='up:DP']) and not(//oai:setSpec[.='up:SMP'])" dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
65 |
if xpath:"//oai:setSpec[.='up:DP']" dr:CobjCategory = "0031"; else $varDummy = "''"; |
|
66 |
if xpath:"//oai:setSpec[.='up:SMP']" dr:CobjCategory = "0032"; else $varDummy = "''"; |
|
67 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
68 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
69 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
70 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
71 |
// |
|
72 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
73 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
74 |
// |
|
75 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
76 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
77 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
78 |
// oaf:journal = set($varOfficialname, @issn = xpath:"replace(//oaf:datasourceprefix, 'issn', '')";); |
|
79 |
oaf:journal = set($varOfficialname, @issn = xpath:"//dc:source[2]";); |
|
80 |
end</CODE> |
|
81 |
</SCRIPT> |
|
82 |
</CONFIGURATION> |
|
83 |
<STATUS/> |
|
84 |
<SECURITY_PARAMETERS/> |
|
85 |
</BODY> |
|
86 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/xslt_cleaning_zenodo_datacite.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="33e8cc88-3b4a-4a68-b332-7cae8baad8dc_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2014-07-17T10:41:08+02:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>xslt_cleaning_zenodo_datacite</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1" |
|
16 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
17 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
18 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
19 |
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy" |
|
20 |
extension-element-prefixes="TransformationFunction" |
|
21 |
exclude-result-prefixes="TransformationFunction"> |
|
22 |
|
|
23 |
<xsl:param name="varOfficialName" /> |
|
24 |
<xsl:param name="varDsType" /> |
|
25 |
<xsl:param name="varDataSourceId" /> |
|
26 |
<xsl:param name="varCorda" select="'corda_______::'"/> |
|
27 |
|
|
28 |
<xsl:param name="index" select="0"/> |
|
29 |
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/> |
|
30 |
|
|
31 |
|
|
32 |
<xsl:template match="/"> |
|
33 |
<xsl:variable name="datasourcePrefix" |
|
34 |
select="normalize-space(//oaf:datasourceprefix)" /> |
|
35 |
<xsl:call-template name="validRecord" /> |
|
36 |
</xsl:template> |
|
37 |
|
|
38 |
|
|
39 |
<xsl:template name="validRecord"> |
|
40 |
<record> |
|
41 |
<xsl:copy-of select="//*[local-name() = 'header']" /> |
|
42 |
|
|
43 |
<metadata> |
|
44 |
<xsl:copy-of select="//*[local-name() = 'metadata']/*[local-name() = 'resource']" /> |
|
45 |
|
|
46 |
<xsl:if test="//*[local-name()='date']/@dateType='Available'"> |
|
47 |
<xsl:variable name='varEmbargoEndDate' |
|
48 |
select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='date'][@dateType='Available']), 'DateISO8601')"/> |
|
49 |
<xsl:choose> |
|
50 |
<xsl:when test="string-length($varEmbargoEndDate) > 0"> |
|
51 |
<oaf:embargoenddate> |
|
52 |
<xsl:value-of select="$varEmbargoEndDate"/> |
|
53 |
</oaf:embargoenddate> |
|
54 |
</xsl:when> |
|
55 |
<xsl:otherwise> |
|
56 |
<oaf:skip> |
|
57 |
<xsl:value-of select="TransformationFunction:skipRecord($tf, $index)"/> |
|
58 |
</oaf:skip> |
|
59 |
</xsl:otherwise> |
|
60 |
</xsl:choose> |
|
61 |
</xsl:if> |
|
62 |
|
|
63 |
<dr:CobjCategory><xsl:value-of |
|
64 |
select="TransformationFunction:convertString($tf, //*[local-name()='resourceType']/@resourceTypeGeneral, 'TextTypologies')" /> |
|
65 |
</dr:CobjCategory> |
|
66 |
|
|
67 |
<oaf:dateAccepted> |
|
68 |
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='publicationYear']), 'DateISO8601')"/> |
|
69 |
</oaf:dateAccepted> |
|
70 |
<xsl:choose> |
|
71 |
|
|
72 |
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]"> |
|
73 |
<oaf:accessrights> |
|
74 |
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'AccessRights')"/> |
|
75 |
</oaf:accessrights> |
|
76 |
</xsl:when> |
|
77 |
<xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')]"> |
|
78 |
<oaf:accessrights> |
|
79 |
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'AccessRights')"/> |
|
80 |
</oaf:accessrights> |
|
81 |
</xsl:when> |
|
82 |
<xsl:otherwise> |
|
83 |
<xsl:choose> |
|
84 |
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'http://creativecommons.org')]"> |
|
85 |
<oaf:accessrights> |
|
86 |
<xsl:text>OPEN</xsl:text> |
|
87 |
</oaf:accessrights> |
|
88 |
</xsl:when> |
|
89 |
<xsl:otherwise> |
|
90 |
<oaf:accessrights> |
|
91 |
<xsl:text>CLOSED</xsl:text> |
|
92 |
</oaf:accessrights> |
|
93 |
</xsl:otherwise> |
|
94 |
</xsl:choose> |
|
95 |
</xsl:otherwise> |
|
96 |
</xsl:choose> |
|
97 |
|
|
98 |
<oaf:language> |
|
99 |
<xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" /> |
|
100 |
</oaf:language> |
|
101 |
|
|
102 |
<!-- |
|
103 |
<xsl:if test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics/embargoedAccess')]"> |
|
104 |
<oaf:embargoenddate> |
|
105 |
<xsl:value-of select="//*[local-name()='date']/@dateType='Available'"/> |
|
106 |
</oaf:embargoenddate> |
|
107 |
</xsl:if> |
|
108 |
--> |
|
109 |
|
|
110 |
<xsl:for-each select="//*[local-name()='nameIdentifier']"> |
|
111 |
<xsl:if test="string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6"> |
|
112 |
<oaf:projectid> |
|
113 |
<xsl:value-of select="TransformationFunction:regExpr($tf, normalize-space(.), $varCorda, 's/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gm')"/> |
|
114 |
</oaf:projectid> |
|
115 |
</xsl:if> |
|
116 |
</xsl:for-each> |
|
117 |
|
|
118 |
|
|
119 |
<oaf:hostedBy> |
|
120 |
<xsl:attribute name="name"> |
|
121 |
<xsl:value-of select="$varOfficialName"/> |
|
122 |
</xsl:attribute> |
|
123 |
<xsl:attribute name="id"> |
|
124 |
<xsl:value-of select="$varDataSourceId"/> |
|
125 |
</xsl:attribute> |
|
126 |
</oaf:hostedBy> |
|
127 |
<oaf:collectedFrom> |
|
128 |
<xsl:attribute name="name"> |
|
129 |
<xsl:value-of select="$varOfficialName"/> |
|
130 |
</xsl:attribute> |
|
131 |
<xsl:attribute name="id"> |
|
132 |
<xsl:value-of select="$varDataSourceId"/> |
|
133 |
</xsl:attribute> |
|
134 |
</oaf:collectedFrom> |
|
135 |
</metadata> |
|
136 |
<xsl:copy-of select="//*[local-name() = 'about']" /> |
|
137 |
</record> |
|
138 |
</xsl:template> |
|
139 |
|
|
140 |
<!-- |
|
141 |
<xsl:template match="//*[local-name()='language']"> |
|
142 |
<oaf:language> |
|
143 |
<xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" /> |
|
144 |
</oaf:language> |
|
145 |
</xsl:template> |
|
146 |
--> |
|
147 |
</xsl:stylesheet> |
|
148 |
]]></CODE> |
|
149 |
</SCRIPT> |
|
150 |
</CONFIGURATION> |
|
151 |
<STATUS/> |
|
152 |
<SECURITY_PARAMETERS/> |
|
153 |
</BODY> |
|
154 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_dcidentifier_last.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="50cbbb8f-b9d4-486d-9895-a80ca7b963b9_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
5 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<IMPORTED/> |
|
13 |
<SCRIPT> |
|
14 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_dcidentifier_last</TITLE> |
|
15 |
<CODE><![CDATA[ |
|
16 |
declare_script "dc_cleaning_OpenAIREplus_compliant_dcidentifier_last"; |
|
17 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
18 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
19 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
20 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
21 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
22 |
$var0 = "''"; |
|
23 |
$var1 = "'corda_______::'"; |
|
24 |
$varDummy = "''"; |
|
25 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
26 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
27 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
28 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
29 |
dri:repositoryId = $varRepoid; |
|
30 |
// this can be made easier |
|
31 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
dc:contributor = xpath:"//dc:contributor"; |
|
38 |
dc:description = xpath:"//dc:description"; |
|
39 |
$varHttpTest = "''"; |
|
40 |
if xpath:"//dc:identifier[last()][starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
41 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http') and position()=last()" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
42 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
43 |
// dc:type = xpath:"//dc:type"; |
|
44 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
45 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
46 |
dc:date = xpath:"//dc:date"; |
|
47 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
48 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
49 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
50 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
53 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
54 |
// |
|
55 |
// |
|
56 |
// |
|
57 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
58 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
59 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
60 |
// |
|
61 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
62 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
63 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
64 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
65 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
66 |
// |
|
67 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
68 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
69 |
// |
|
70 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
71 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
72 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
73 |
end |
|
74 |
]]></CODE> |
|
75 |
</SCRIPT> |
|
76 |
</CONFIGURATION> |
|
77 |
<STATUS/> |
|
78 |
<SECURITY_PARAMETERS/> |
|
79 |
</BODY> |
|
80 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_arxiv.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="18b2d7cb-dd69-4c58-9bad-8fcdb5972d5c_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2014-01-28T13:52:15+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAREplus_arxiv</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_arxiv"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
dc:contributor = xpath:"//dc:contributor"; |
|
37 |
dc:description = xpath:"//dc:description"; |
|
38 |
$varHttpTest = "''"; |
|
39 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
40 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
41 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
42 |
// dc:type = xpath:"//dc:type"; |
|
43 |
if xpath:"count(//dc:language) = 0" dc:language = "eng"; else dc:language = Convert(xpath:"//dc:language", Languages); |
|
44 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
45 |
dc:date = xpath:"//dc:date"; |
|
46 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
47 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
48 |
apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 5" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
49 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
50 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
51 |
// |
|
52 |
// |
|
53 |
// |
|
54 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
55 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
56 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
57 |
// |
|
58 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
59 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
60 |
oaf:accessrights = "OPEN"; |
|
61 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
62 |
// |
|
63 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
64 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
65 |
// |
|
66 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
67 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
68 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
69 |
end |
|
70 |
]]></CODE> |
|
71 |
</SCRIPT> |
|
72 |
</CONFIGURATION> |
|
73 |
<STATUS/> |
|
74 |
<SECURITY_PARAMETERS/> |
|
75 |
</BODY> |
|
76 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_pensoft.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="7a0f18a4-7015-45ec-a9e5-1c7368889d7f_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_pensoft</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_pensoft"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
static $varHostedByPrefix = "'openaire____::issn'"; |
|
24 |
$varDummy = "''"; |
|
25 |
$varHostedByName = RegExpr(xpath:"//dc:source", $var0, "s/(\s+)(\d+)(.*)$//gmi"); |
|
26 |
$varHostedByIdTemp = RegExpr(xpath:"//dc:relation[last()]", $var0, "s/^(.*)(\/)|(-)//gmi"); |
|
27 |
$varHostedById = xpath:"concat($varHostedByPrefix, $varHostedByIdTemp)"; |
|
28 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
29 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
30 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
31 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
32 |
dri:repositoryId = $varRepoid; |
|
33 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
34 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
38 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
39 |
dc:contributor = xpath:"//dc:contributor"; |
|
40 |
dc:description = xpath:"//dc:description"; |
|
41 |
$varHttpTest = "''"; |
|
42 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
43 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
44 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
45 |
// dc:type = xpath:"//dc:type"; |
|
46 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
47 |
dc:date = xpath:"//dc:date"; |
|
48 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
49 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
50 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
52 |
// |
|
53 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
54 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
55 |
// oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
56 |
// |
|
57 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
58 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
59 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
60 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
61 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
62 |
// |
|
63 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
64 |
static oaf:hostedBy = set("''", @name = $varHostedByName; , @id = $varHostedById;); |
|
65 |
// |
|
66 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
67 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
68 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
69 |
end |
|
70 |
]]></CODE> |
|
71 |
</SCRIPT> |
|
72 |
</CONFIGURATION> |
|
73 |
<STATUS/> |
|
74 |
<SECURITY_PARAMETERS/> |
|
75 |
</BODY> |
|
76 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="d8bf1473-4af6-4c18-8b27-fb59759908ed_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAREplus</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
dc:contributor = xpath:"//dc:contributor"; |
|
37 |
dc:description = xpath:"//dc:description"; |
|
38 |
$varHttpTest = "''"; |
|
39 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
40 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
41 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
42 |
// dc:type = xpath:"//dc:type"; |
|
43 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
44 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
45 |
dc:date = xpath:"//dc:date"; |
|
46 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
47 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
48 |
apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 5" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
49 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
50 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
51 |
// |
|
52 |
// |
|
53 |
// |
|
54 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
55 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
56 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
57 |
// |
|
58 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
59 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
60 |
oaf:accessrights = "OPEN"; |
|
61 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
62 |
// |
|
63 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
64 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
65 |
// |
|
66 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
67 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
68 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
69 |
end |
|
70 |
]]></CODE> |
|
71 |
</SCRIPT> |
|
72 |
</CONFIGURATION> |
|
73 |
<STATUS/> |
|
74 |
<SECURITY_PARAMETERS/> |
|
75 |
</BODY> |
|
76 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_1299.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="044e8e7c-0861-42d5-9b4a-15252faa446e_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-29T10:05:49+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_1299</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_1299"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
if xpath:"count(//dc:creator) = 0 or //dc:creator[starts-with(., '?????')]" dc:creator = skipRecord(); else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
// if xpath:"//dc:title[starts-with(., '?????')]" dc:title = skipRecord(); else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
38 |
dc:contributor = xpath:"//dc:contributor"; |
|
39 |
dc:description = xpath:"//dc:description"; |
|
40 |
$varHttpTest = "''"; |
|
41 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
42 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
43 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
44 |
// dc:type = xpath:"//dc:type"; |
|
45 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
46 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
47 |
dc:date = xpath:"//dc:date"; |
|
48 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
49 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
50 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
53 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
54 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
55 |
// |
|
56 |
// |
|
57 |
// |
|
58 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
59 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
60 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
61 |
// |
|
62 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
63 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
64 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
65 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
66 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
67 |
// |
|
68 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
69 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
70 |
// |
|
71 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
72 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
73 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
74 |
end |
|
75 |
]]></CODE> |
|
76 |
</SCRIPT> |
|
77 |
</CONFIGURATION> |
|
78 |
<STATUS/> |
|
79 |
<SECURITY_PARAMETERS/> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_creator_comma.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bc13c7ee-869f-4b76-81a8-2ee6ffcb81fa_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2014-04-09T08:05:59+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_creator_comma</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_creator_comma"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"concat(substring-before(normalize-space(.), ' '), ', ', substring-after(normalize-space(.), ' '))"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:contributor" if xpath:"string-length(.) > 0" dc:contributor = xpath:"concat(substring-before(normalize-space(.), ' '), ', ', substring-after(normalize-space(.), ' '))"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
38 |
dc:description = xpath:"//dc:description"; |
|
39 |
$varHttpTest = "''"; |
|
40 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
41 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
42 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
43 |
// dc:type = xpath:"//dc:type"; |
|
44 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
45 |
//dc:language = "eng"; |
|
46 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
47 |
dc:date = xpath:"//dc:date"; |
|
48 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
49 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
50 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 and not(contains(normalize-space(.), '/12345'))" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
53 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
54 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
55 |
// |
|
56 |
// |
|
57 |
// |
|
58 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
59 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
60 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
61 |
// |
|
62 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
63 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
64 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
65 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
66 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
67 |
// |
|
68 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
69 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
70 |
// |
|
71 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
72 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
73 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
74 |
end |
|
75 |
]]></CODE> |
|
76 |
</SCRIPT> |
|
77 |
</CONFIGURATION> |
|
78 |
<STATUS/> |
|
79 |
<SECURITY_PARAMETERS/> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_milano.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="c190a6a3-83dd-43a8-b9b2-f4db6a72beca_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_milano</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_milano"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
dc:contributor = xpath:"//dc:contributor"; |
|
37 |
dc:description = xpath:"//dc:description"; |
|
38 |
$varHttpTest = "''"; |
|
39 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
40 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/closedAccess')] and not(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/grantAgreement')])" dc:relation = skipRecord(); else $varDummy = "''"; |
|
41 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
42 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
43 |
// dc:type = xpath:"//dc:type"; |
|
44 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
45 |
//dc:language = "eng"; |
|
46 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
47 |
dc:date = xpath:"//dc:date"; |
|
48 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
49 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
50 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
53 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
54 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
55 |
// |
|
56 |
// |
|
57 |
// |
|
58 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
59 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
60 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
61 |
// |
|
62 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
63 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
64 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
65 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
66 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
67 |
// |
|
68 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
69 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
70 |
// |
|
71 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
72 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
73 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
74 |
end |
|
75 |
]]></CODE> |
|
76 |
</SCRIPT> |
|
77 |
</CONFIGURATION> |
|
78 |
<STATUS/> |
|
79 |
<SECURITY_PARAMETERS/> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b844840b-4efa-40a1-a1ce-90b4b63972c2_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
30 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
31 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
dc:contributor = xpath:"//dc:contributor"; |
|
36 |
dc:description = xpath:"//dc:description"; |
|
37 |
$varHttpTest = "''"; |
|
38 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
39 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
40 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
41 |
// dc:type = xpath:"//dc:type"; |
|
42 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
43 |
//dc:language = "eng"; |
|
44 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
45 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
46 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
47 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
48 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
49 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
50 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
51 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
52 |
// |
|
53 |
// |
|
54 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
55 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
56 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
57 |
// |
|
58 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
59 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
60 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
61 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
62 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
63 |
// |
|
64 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
65 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
66 |
// |
|
67 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
68 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
69 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
70 |
end |
|
71 |
]]></CODE> |
|
72 |
</SCRIPT> |
|
73 |
</CONFIGURATION> |
|
74 |
<STATUS/> |
|
75 |
<SECURITY_PARAMETERS/> |
|
76 |
</BODY> |
|
77 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingJob" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="queueName" value="default"/> |
|
15 |
<PROPERTY key="user.name" value="dnet.beta" /> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing" /> |
|
19 |
<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing" /> |
|
20 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false" /> |
|
21 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
22 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> |
|
23 |
<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
24 |
</STATIC_CONFIGURATION> |
|
25 |
<JOB_INTERFACE> |
|
26 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" /> |
|
27 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" /> |
|
28 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" /> |
|
29 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" /> |
|
30 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint" /> |
|
31 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" /> |
|
32 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" /> |
|
33 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" /> |
|
34 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" /> |
|
35 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" /> |
|
36 |
<PARAM name="nameNode" required="true" description="hdfs name node" /> |
|
37 |
<PARAM name="jobTracker" required="true" description="job tracker name" /> |
|
38 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" /> |
|
39 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" /> |
|
40 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" /> |
|
41 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" /> |
|
42 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" /> |
|
43 |
</JOB_INTERFACE> |
|
44 |
</HADOOP_JOB> |
|
45 |
<STATUS> |
|
46 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
47 |
<RUNNING_INSTANCES value="0"/> |
|
48 |
<CUMULATIVE_RUN value="0" /> |
|
49 |
</STATUS> |
|
50 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
51 |
</BODY> |
|
52 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_fraunhofer.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="3ff02e38-f3d2-4ecc-9d45-7100256c979d_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2014-04-29T03:51:40+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_fraunhofer</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_fraunhofer"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
dc:contributor = xpath:"//dc:contributor"; |
|
38 |
dc:description = xpath:"//dc:description"; |
|
39 |
$varHttpTest = "''"; |
|
40 |
if xpath:"//dc:rights[starts-with(., 'openAccess')] or count(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/grantAgreement')]) > 0" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
41 |
//if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
42 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
43 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
44 |
// dc:type = xpath:"//dc:type"; |
|
45 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
46 |
//dc:language = "eng"; |
|
47 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
48 |
dc:date = xpath:"//dc:date"; |
|
49 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
50 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
51 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 and not(contains(normalize-space(.), '/12345'))" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
53 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
54 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
55 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
56 |
// |
|
57 |
// |
|
58 |
// |
|
59 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
60 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
61 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
62 |
// |
|
63 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
64 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
65 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
66 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
67 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
68 |
// |
|
69 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
70 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
71 |
// |
|
72 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
73 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
74 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
75 |
end |
|
76 |
]]></CODE> |
|
77 |
</SCRIPT> |
|
78 |
</CONFIGURATION> |
|
79 |
<STATUS/> |
|
80 |
<SECURITY_PARAMETERS/> |
|
81 |
</BODY> |
|
82 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisMainJob" type="oozie"> |
|
11 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
17 |
|
|
18 |
<!-- Runtime --> |
|
19 |
<PROPERTY key="active_existence_filter" value="true"/> |
|
20 |
<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/> |
|
21 |
<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/> |
|
22 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> |
|
23 |
<PROPERTY key="export_action_hbase_table_initialize" value="true"/> |
|
24 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
25 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/> |
|
26 |
<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
27 |
</STATIC_CONFIGURATION> |
|
28 |
<JOB_INTERFACE> |
|
29 |
<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint" /> |
|
30 |
<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing" /> |
|
31 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" /> |
|
32 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records" /> |
|
33 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" /> |
|
34 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" /> |
|
35 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" /> |
|
36 |
<PARAM name="nameNode" required="true" description="hdfs name node" /> |
|
37 |
<PARAM name="jobTracker" required="true" description="job tracker name" /> |
|
38 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" /> |
|
39 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
40 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
41 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/> |
|
42 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
43 |
<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/> |
|
44 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
45 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
46 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
47 |
|
|
48 |
<!-- flags to enable/disable IIS modules --> |
|
49 |
<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module" /> |
|
50 |
<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module" /> |
|
51 |
<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module" /> |
|
52 |
<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module" /> |
|
53 |
<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module" /> |
|
54 |
<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module" /> |
|
55 |
<PARAM name="active_referenceextraction_researchinitiative" required="true" description="enable/disable the research initiative extraction module" /> |
|
56 |
<PARAM name="active_statistics" required="true" description="enable/disable the statistics module" /> |
|
57 |
</JOB_INTERFACE> |
|
58 |
</HADOOP_JOB> |
|
59 |
<STATUS> |
|
60 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
61 |
<RUNNING_INSTANCES value="0"/> |
|
62 |
<CUMULATIVE_RUN value="0"/> |
|
63 |
</STATUS> |
|
64 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
65 |
</BODY> |
|
66 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupCandidateScanJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="675f1436-205a-4b19-8b6b-35e1c17fb125_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupCandidateScanJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
34 |
|
|
35 |
<PROPERTY key="mapred.reduce.tasks" value="1000" /> |
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
43 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
46 |
<PARAM name="dedup.pace.conf" required="true" description="dedup pace configuration" /> |
|
47 |
<PARAM name="dedup.wf.conf" required="true" description="dedup workflow configuration" /> |
|
48 |
</JOB_INTERFACE> |
|
49 |
<SCAN> |
|
50 |
<FILTERS operator="MUST_PASS_ALL"> |
|
51 |
<FILTER type="prefix" param="entityTypeId" /> |
|
52 |
</FILTERS> |
|
53 |
<FAMILIES> |
|
54 |
<FAMILY param="entityType" /> |
|
55 |
<FAMILY value="resultResult_dedup_merges" /> |
|
56 |
<FAMILY value="personPerson_dedup_merges" /> |
|
57 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
58 |
</FAMILIES> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_dcrelation_last.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="351c08cd-08a7-4a7b-b629-80b828553164_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_dcrelation_last</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_dcrelation_last"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
37 |
dc:contributor = xpath:"//dc:contributor"; |
|
38 |
dc:description = xpath:"//dc:description"; |
|
39 |
$varHttpTest = "''"; |
|
40 |
$varHttpLinks = xpath:"//dc:relation[starts-with(., 'http')]"; |
|
41 |
if xpath:"count($varHttpLinks) > 0" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
42 |
dc:identifier = xpath:"$varHttpLinks[last()][normalize-space(.)]"; |
|
43 |
dr:CobjIdentifier = xpath:"//dc:identifier[normalize-space(.)]"; |
|
44 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http') and position=last()" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
45 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
46 |
// dc:type = xpath:"//dc:type"; |
|
47 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
48 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
49 |
dc:date = xpath:"//dc:date"; |
|
50 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
51 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
52 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
53 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
54 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
55 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
56 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
57 |
// |
|
58 |
// |
|
59 |
// |
|
60 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
61 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
62 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
63 |
// |
|
64 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
65 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
66 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
67 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
68 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
69 |
// |
|
70 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
71 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
72 |
// |
|
73 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
74 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
75 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
76 |
end |
|
77 |
]]></CODE> |
|
78 |
</SCRIPT> |
|
79 |
</CONFIGURATION> |
|
80 |
<STATUS/> |
|
81 |
<SECURITY_PARAMETERS/> |
|
82 |
</BODY> |
|
83 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/oaiFeedJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="03d7af20-63bb-4790-a052-6cdbc1e05fce_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2015-02-09T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="oaiFeedJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that feeds the OAI store</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" /> |
|
22 |
|
|
23 |
<!-- JOB GLOBAL --> |
|
24 |
<!-- <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> --> |
|
25 |
<!-- <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> --> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<PROPERTY key="job.lib" value="/user/dnet/lib/dnet-mapreduce-jobs-assembly-0.0.6.3-SNAPSHOT.jar"/> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
38 |
<PARAM name="services.publisher.oai.collection" required="true" description="target mongodb collection" /> |
|
39 |
<PARAM name="oaiConfiguration" required="true" description="configuration bean used to guide the OAI feeding" /> |
|
40 |
<PARAM name="oai.feed.date" required="true" description="timestamp" /> |
|
41 |
<PARAM name="services.publisher.oai.host" required="true" description="mongodb host" /> |
|
42 |
<PARAM name="services.publisher.oai.port" required="true" description="mongodb port" /> |
|
43 |
<PARAM name="services.publisher.oai.db" required="true" description="mongodb database name" /> |
|
44 |
<PARAM name="services.publisher.oai.skipDuplicates" required="true" description="skip duplicated records." /> |
|
45 |
<PARAM name="services.publisher.oai.duplicateXPath" required="true" description="records with this xpath are identified as duplicates" /> |
|
46 |
</JOB_INTERFACE> |
|
47 |
<SCAN> |
|
48 |
<FILTERS /> |
|
49 |
<FAMILIES /> |
|
50 |
</SCAN> |
|
51 |
</HADOOP_JOB> |
|
52 |
<STATUS> |
|
53 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
54 |
<RUNNING_INSTANCES value="0"/> |
|
55 |
<CUMULATIVE_RUN value="0" /> |
|
56 |
</STATUS> |
|
57 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_tropmed.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="75b06691-81d5-47c4-9626-bf1dd5b21122_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
4 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<IMPORTED/> |
|
12 |
<SCRIPT> |
|
13 |
<TITLE>dc_cleaning_OPENAIREplus_compliant_tropmed</TITLE> |
|
14 |
<CODE><![CDATA[ |
|
15 |
declare_script "dc_cleaning_OpenAIREplus_compliant_tropmed"; |
|
16 |
declare_ns oaf = "http://namespace.openaire.eu/oaf"; |
|
17 |
declare_ns dri = "http://www.driver-repository.eu/namespace/dri"; |
|
18 |
declare_ns dr = "http://www.driver-repository.eu/namespace/dr"; |
|
19 |
declare_ns dc = "http://purl.org/dc/elements/1.1/"; |
|
20 |
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance"; |
|
21 |
$var0 = "''"; |
|
22 |
$var1 = "'corda_______::'"; |
|
23 |
$varDummy = "''"; |
|
24 |
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
25 |
static $varRepoid = xpath:"//dri:repositoryId"; |
|
26 |
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]); |
|
27 |
dri:objIdentifier = xpath:"//dri:objIdentifier"; |
|
28 |
dri:repositoryId = $varRepoid; |
|
29 |
//dri:repositoryId = xpath:"//dri:repositoryId"; |
|
30 |
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/"); |
|
31 |
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
32 |
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
33 |
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
34 |
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
35 |
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; |
|
36 |
dc:contributor = xpath:"//dc:contributor"; |
|
37 |
dc:description = xpath:"//dc:description"; |
|
38 |
$varHttpTest = "''"; |
|
39 |
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord(); |
|
40 |
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http://hdl.handle.net/10390/')" dc:identifier = xpath:"concat('http://dspace.itg.be/handle/10390/', substring-after(normalize-space(.), 'http://hdl.handle.net/10390/'))"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; |
|
41 |
static dr:dateOfCollection = getValue(CURRENTDATE, []); |
|
42 |
// dc:type = xpath:"//dc:type"; |
|
43 |
dc:language = Convert(xpath:"//dc:language", Languages); |
|
44 |
//dc:language = "eng"; |
|
45 |
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord(); |
|
46 |
dc:date = xpath:"//dc:date"; |
|
47 |
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); |
|
48 |
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''"; |
|
49 |
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
50 |
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
51 |
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)"; |
|
52 |
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)"; |
|
53 |
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
54 |
// |
|
55 |
// |
|
56 |
// |
|
57 |
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]); |
|
58 |
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
59 |
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); |
|
60 |
// |
|
61 |
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:"."; |
|
62 |
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"."; |
|
63 |
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN"; |
|
64 |
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''"; |
|
65 |
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights); |
|
66 |
// |
|
67 |
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
68 |
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;); |
|
69 |
// |
|
70 |
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'); |
|
71 |
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";); |
|
72 |
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; |
|
73 |
end |
|
74 |
]]></CODE> |
|
75 |
</SCRIPT> |
|
76 |
</CONFIGURATION> |
|
77 |
<STATUS/> |
|
78 |
<SECURITY_PARAMETERS/> |
|
79 |
</BODY> |
|
80 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/indexFeedJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="1c34963b-75b3-4440-9f42-72445a26c077_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="indexFeedJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.IndexFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- JOB GLOBAL --> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> |
|
32 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
33 |
|
|
34 |
<!-- Uncomment to override the default lib path --> |
|
35 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
36 |
</STATIC_CONFIGURATION> |
|
37 |
<JOB_INTERFACE> |
|
38 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
39 |
<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" /> |
|
40 |
|
|
41 |
<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> |
|
42 |
<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" /> |
|
43 |
|
|
44 |
<PARAM name="id" required="true" description="index DS id" /> |
|
45 |
<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" /> |
|
46 |
<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" /> |
|
47 |
<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" /> |
|
48 |
<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" /> |
|
49 |
<PARAM name="index.xslt" required="true" description="record transformer created by the MSRO service" /> |
|
50 |
</JOB_INTERFACE> |
|
51 |
<SCAN> |
|
52 |
<FILTERS /> |
|
53 |
<FAMILIES /> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.4