Project

General

Profile

« Previous | Next » 

Revision 36719

[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.4

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_ubiquitypress.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="b7773110-4beb-45f7-adf1-9cb4037fad58_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-10-23T09:58:00+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_ubiquitypress</TITLE>
14
                <CODE>declare_script "dc_cleaning_OpenAIREplus_compliant_ubiquitypress";
15
declare_ns oaf = "http://namespace.openaire.eu/oaf";
16
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
17
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
18
declare_ns dc = "http://purl.org/dc/elements/1.1/";
19
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
20
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
30
apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and contains(., ';')" dc:creator = xpath:"substring-before(normalize-space(.), ';')"; else $varDummy = "''";
31
apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and not(contains(., ';'))" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
// apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http')" oaf:fulltext = xpath:"replace(normalize-space(.),'article/view', 'article/download')"; else $varDummy = "''";
37
oaf:fulltext = xpath:"replace(//dc:relation[count(//dc:format[.='application/pdf']/preceding-sibling::dc:format)+1], 'article/view', 'article/download')";
38
// oaf:fulltext = xpath:"count(//dc:format[.='application/pdf']/preceding-sibling::dc:format)";
39
dc:contributor = xpath:"//dc:contributor";
40
dc:description = xpath:"//dc:description";
41
$varHttpTest = "''";
42
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
43
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
44
static dr:dateOfCollection = getValue(CURRENTDATE, []);
45
// dc:type = xpath:"//dc:type";
46
dc:language = Convert(xpath:"//dc:language", Languages);
47
//dc:language = "eng";
48
dc:date = xpath:"//dc:date";
49
dc:format = xpath:"//dc:format";
50
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
51
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
52
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
53
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
54
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
55
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
56
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
57
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
58
//
59
//
60
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
61
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
62
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
63
//
64
apply xpath:"//dc:type" if xpath:". and not(//oai:setSpec[.='up:DP']) and not(//oai:setSpec[.='up:SMP'])" dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
65
if xpath:"//oai:setSpec[.='up:DP']" dr:CobjCategory = "0031"; else $varDummy = "''";
66
if xpath:"//oai:setSpec[.='up:SMP']" dr:CobjCategory = "0032"; else $varDummy = "''";
67
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
68
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
69
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
70
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
71
//
72
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
73
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
74
//
75
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
76
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
77
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
78
// oaf:journal = set($varOfficialname, @issn = xpath:"replace(//oaf:datasourceprefix, 'issn', '')";);
79
oaf:journal = set($varOfficialname, @issn = xpath:"//dc:source[2]";);
80
end</CODE>
81
            </SCRIPT>
82
        </CONFIGURATION>
83
        <STATUS/>
84
        <SECURITY_PARAMETERS/>
85
    </BODY>
86
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/xslt_cleaning_zenodo_datacite.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="33e8cc88-3b4a-4a68-b332-7cae8baad8dc_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-07-17T10:41:08+02:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>xslt_cleaning_zenodo_datacite</TITLE>
14
                <CODE><![CDATA[
15
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
16
 		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
17
                xmlns:oaf="http://namespace.openaire.eu/oaf"
18
                xmlns:dr="http://www.driver-repository.eu/namespace/dr"
19
        	xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
20
                extension-element-prefixes="TransformationFunction"
21
                exclude-result-prefixes="TransformationFunction">
22

  
23
  <xsl:param name="varOfficialName" />
24
  <xsl:param name="varDsType" />
25
  <xsl:param name="varDataSourceId" />
26
  <xsl:param name="varCorda" select="'corda_______::'"/>
27

  
28
  <xsl:param name="index" select="0"/>
29
  <xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
30

  
31

  
32
  <xsl:template match="/">
33
    <xsl:variable name="datasourcePrefix"
34
             select="normalize-space(//oaf:datasourceprefix)" />
35
    <xsl:call-template name="validRecord" />
36
  </xsl:template>
37

  
38

  
39
  <xsl:template name="validRecord">
40
    <record>
41
      <xsl:copy-of select="//*[local-name() = 'header']" />
42

  
43
      <metadata>
44
        <xsl:copy-of select="//*[local-name() = 'metadata']/*[local-name() = 'resource']" />
45

  
46
         <xsl:if test="//*[local-name()='date']/@dateType='Available'">
47
            <xsl:variable name='varEmbargoEndDate' 
48
                select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='date'][@dateType='Available']), 'DateISO8601')"/>
49
            <xsl:choose>
50
              <xsl:when test="string-length($varEmbargoEndDate) > 0">
51
                <oaf:embargoenddate>
52
                  <xsl:value-of select="$varEmbargoEndDate"/>
53
                </oaf:embargoenddate>
54
              </xsl:when>
55
              <xsl:otherwise>
56
               <oaf:skip>
57
                 <xsl:value-of select="TransformationFunction:skipRecord($tf, $index)"/>
58
               </oaf:skip>
59
              </xsl:otherwise>
60
            </xsl:choose>
61
         </xsl:if>
62

  
63
         <dr:CobjCategory><xsl:value-of 
64
                 select="TransformationFunction:convertString($tf, //*[local-name()='resourceType']/@resourceTypeGeneral, 'TextTypologies')" />
65
         </dr:CobjCategory>
66

  
67
         <oaf:dateAccepted>
68
               <xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='publicationYear']), 'DateISO8601')"/>           
69
         </oaf:dateAccepted>
70
         <xsl:choose>
71

  
72
          <xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
73
             <oaf:accessrights>
74
                <xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'AccessRights')"/>
75
             </oaf:accessrights>
76
          </xsl:when>
77
          <xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
78
             <oaf:accessrights>
79
                <xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'AccessRights')"/>
80
             </oaf:accessrights>
81
          </xsl:when>
82
          <xsl:otherwise>
83
            <xsl:choose>
84
             <xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'http://creativecommons.org')]">
85
              <oaf:accessrights>
86
                 <xsl:text>OPEN</xsl:text>
87
              </oaf:accessrights>
88
             </xsl:when>
89
             <xsl:otherwise>
90
              <oaf:accessrights>
91
                 <xsl:text>CLOSED</xsl:text>
92
              </oaf:accessrights>
93
             </xsl:otherwise> 
94
            </xsl:choose>
95
          </xsl:otherwise>
96
         </xsl:choose>
97

  
98
         <oaf:language>
99
           <xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" />
100
         </oaf:language>
101

  
102
<!--
103
    <xsl:if test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics/embargoedAccess')]">
104
        <oaf:embargoenddate>
105
          <xsl:value-of select="//*[local-name()='date']/@dateType='Available'"/>
106
        </oaf:embargoenddate>
107
    </xsl:if>
108
-->
109

  
110
         <xsl:for-each select="//*[local-name()='nameIdentifier']">
111
            <xsl:if test="string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6">
112
                <oaf:projectid>
113
                    <xsl:value-of select="TransformationFunction:regExpr($tf, normalize-space(.), $varCorda, 's/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gm')"/>
114
                </oaf:projectid>
115
            </xsl:if>
116
         </xsl:for-each>
117

  
118

  
119
         <oaf:hostedBy>
120
            <xsl:attribute name="name">
121
               <xsl:value-of select="$varOfficialName"/>
122
            </xsl:attribute>
123
            <xsl:attribute name="id">
124
               <xsl:value-of select="$varDataSourceId"/>
125
            </xsl:attribute>
126
         </oaf:hostedBy>
127
         <oaf:collectedFrom>
128
            <xsl:attribute name="name">
129
               <xsl:value-of select="$varOfficialName"/>
130
            </xsl:attribute>
131
            <xsl:attribute name="id">
132
               <xsl:value-of select="$varDataSourceId"/>
133
            </xsl:attribute>
134
         </oaf:collectedFrom>
135
      </metadata>
136
      <xsl:copy-of select="//*[local-name() = 'about']" />
137
   </record>
138
  </xsl:template>
139

  
140
<!--
141
  <xsl:template match="//*[local-name()='language']">
142
         <oaf:language>
143
           <xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" />
144
         </oaf:language>
145
  </xsl:template>
146
-->
147
</xsl:stylesheet>
148
]]></CODE>
149
            </SCRIPT>
150
        </CONFIGURATION>
151
        <STATUS/>
152
        <SECURITY_PARAMETERS/>
153
    </BODY>
154
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_dcidentifier_last.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="50cbbb8f-b9d4-486d-9895-a80ca7b963b9_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
5
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
6
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
9
    </HEADER>
10
    <BODY>
11
        <CONFIGURATION>
12
            <IMPORTED/>
13
            <SCRIPT>
14
                <TITLE>dc_cleaning_OPENAIREplus_compliant_dcidentifier_last</TITLE>
15
                <CODE><![CDATA[
16
declare_script "dc_cleaning_OpenAIREplus_compliant_dcidentifier_last";
17
declare_ns oaf = "http://namespace.openaire.eu/oaf";
18
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
19
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
20
declare_ns dc = "http://purl.org/dc/elements/1.1/";
21
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
22
$var0 = "''";
23
$var1 = "'corda_______::'";
24
$varDummy = "''";
25
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
26
static $varRepoid = xpath:"//dri:repositoryId";
27
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
28
dri:objIdentifier = xpath:"//dri:objIdentifier";
29
dri:repositoryId = $varRepoid;
30
// this can be made easier
31
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
32
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
37
dc:contributor = xpath:"//dc:contributor";
38
dc:description = xpath:"//dc:description";
39
$varHttpTest = "''";
40
if xpath:"//dc:identifier[last()][starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
41
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http') and position()=last()" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
42
static dr:dateOfCollection = getValue(CURRENTDATE, []);
43
// dc:type = xpath:"//dc:type";
44
dc:language = Convert(xpath:"//dc:language", Languages);
45
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
46
dc:date = xpath:"//dc:date";
47
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
48
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
49
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
50
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
53
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
54
//
55
//
56
//
57
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
58
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
59
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
//
61
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
62
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
63
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
64
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
65
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
66
//
67
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
68
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
//
70
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
71
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
72
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
73
end
74
]]></CODE>
75
            </SCRIPT>
76
        </CONFIGURATION>
77
        <STATUS/>
78
        <SECURITY_PARAMETERS/>
79
    </BODY>
80
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_arxiv.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="18b2d7cb-dd69-4c58-9bad-8fcdb5972d5c_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-01-28T13:52:15+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAREplus_arxiv</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_arxiv";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
dc:contributor = xpath:"//dc:contributor";
37
dc:description = xpath:"//dc:description";
38
$varHttpTest = "''";
39
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
40
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
41
static dr:dateOfCollection = getValue(CURRENTDATE, []);
42
// dc:type = xpath:"//dc:type";
43
if xpath:"count(//dc:language) = 0" dc:language = "eng"; else dc:language = Convert(xpath:"//dc:language", Languages);
44
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
45
dc:date = xpath:"//dc:date";
46
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
47
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
48
apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 5" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
49
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
50
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
51
//
52
//
53
//
54
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
55
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
56
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
57
//
58
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
59
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
60
oaf:accessrights = "OPEN";
61
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
62
//
63
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
64
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
65
//
66
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
67
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
68
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
69
end
70
]]></CODE>
71
            </SCRIPT>
72
        </CONFIGURATION>
73
        <STATUS/>
74
        <SECURITY_PARAMETERS/>
75
    </BODY>
76
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_pensoft.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="7a0f18a4-7015-45ec-a9e5-1c7368889d7f_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_pensoft</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_pensoft";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
static $varHostedByPrefix = "'openaire____::issn'";
24
$varDummy = "''";
25
$varHostedByName = RegExpr(xpath:"//dc:source", $var0, "s/(\s+)(\d+)(.*)$//gmi");
26
$varHostedByIdTemp = RegExpr(xpath:"//dc:relation[last()]", $var0, "s/^(.*)(\/)|(-)//gmi");
27
$varHostedById = xpath:"concat($varHostedByPrefix, $varHostedByIdTemp)";
28
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
29
static $varRepoid = xpath:"//dri:repositoryId";
30
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
31
dri:objIdentifier = xpath:"//dri:objIdentifier";
32
dri:repositoryId = $varRepoid;
33
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
34
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
37
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
38
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
39
dc:contributor = xpath:"//dc:contributor";
40
dc:description = xpath:"//dc:description";
41
$varHttpTest = "''";
42
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
43
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
44
static dr:dateOfCollection = getValue(CURRENTDATE, []);
45
// dc:type = xpath:"//dc:type";
46
dc:language = Convert(xpath:"//dc:language", Languages);
47
dc:date = xpath:"//dc:date";
48
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
49
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
50
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
52
//
53
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
54
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
55
// oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
56
//
57
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
58
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
59
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
60
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
61
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
62
//
63
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
64
static oaf:hostedBy = set("''", @name = $varHostedByName; , @id = $varHostedById;);
65
//
66
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
67
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
68
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
69
end
70
]]></CODE>
71
            </SCRIPT>
72
        </CONFIGURATION>
73
        <STATUS/>
74
        <SECURITY_PARAMETERS/>
75
    </BODY>
76
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="d8bf1473-4af6-4c18-8b27-fb59759908ed_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAREplus</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
dc:contributor = xpath:"//dc:contributor";
37
dc:description = xpath:"//dc:description";
38
$varHttpTest = "''";
39
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
40
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
41
static dr:dateOfCollection = getValue(CURRENTDATE, []);
42
// dc:type = xpath:"//dc:type";
43
dc:language = Convert(xpath:"//dc:language", Languages);
44
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
45
dc:date = xpath:"//dc:date";
46
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
47
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
48
apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 5" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
49
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
50
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
51
//
52
//
53
//
54
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
55
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
56
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
57
//
58
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
59
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
60
oaf:accessrights = "OPEN";
61
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
62
//
63
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
64
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
65
//
66
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
67
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
68
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
69
end
70
]]></CODE>
71
            </SCRIPT>
72
        </CONFIGURATION>
73
        <STATUS/>
74
        <SECURITY_PARAMETERS/>
75
    </BODY>
76
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_1299.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="044e8e7c-0861-42d5-9b4a-15252faa446e_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-29T10:05:49+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_1299</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant_1299";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
if xpath:"count(//dc:creator) = 0 or //dc:creator[starts-with(., '?????')]" dc:creator = skipRecord(); else $varDummy = "''";
32
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
33
// if xpath:"//dc:title[starts-with(., '?????')]" dc:title = skipRecord(); else $varDummy = "''";
34
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
37
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
38
dc:contributor = xpath:"//dc:contributor";
39
dc:description = xpath:"//dc:description";
40
$varHttpTest = "''";
41
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
42
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
43
static dr:dateOfCollection = getValue(CURRENTDATE, []);
44
// dc:type = xpath:"//dc:type";
45
dc:language = Convert(xpath:"//dc:language", Languages);
46
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
47
dc:date = xpath:"//dc:date";
48
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
49
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
50
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
54
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
55
//
56
//
57
//
58
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
59
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
61
//
62
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
63
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
64
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
65
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
66
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
67
//
68
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
70
//
71
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
72
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
73
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
74
end
75
]]></CODE>
76
            </SCRIPT>
77
        </CONFIGURATION>
78
        <STATUS/>
79
        <SECURITY_PARAMETERS/>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_creator_comma.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bc13c7ee-869f-4b76-81a8-2ee6ffcb81fa_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-04-09T08:05:59+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_creator_comma</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant_creator_comma";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
32
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"concat(substring-before(normalize-space(.), ' '), ', ', substring-after(normalize-space(.), ' '))"; else $varDummy = "''";
33
apply xpath:"//dc:contributor" if xpath:"string-length(.) > 0" dc:contributor = xpath:"concat(substring-before(normalize-space(.), ' '), ', ', substring-after(normalize-space(.), ' '))"; else $varDummy = "''";
34
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
37
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
38
dc:description = xpath:"//dc:description";
39
$varHttpTest = "''";
40
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
41
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
42
static dr:dateOfCollection = getValue(CURRENTDATE, []);
43
// dc:type = xpath:"//dc:type";
44
dc:language = Convert(xpath:"//dc:language", Languages);
45
//dc:language = "eng";
46
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
47
dc:date = xpath:"//dc:date";
48
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
49
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
50
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 and not(contains(normalize-space(.), '/12345'))" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
54
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
55
//
56
//
57
//
58
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
59
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
61
//
62
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
63
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
64
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
65
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
66
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
67
//
68
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
70
//
71
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
72
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
73
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
74
end
75
]]></CODE>
76
            </SCRIPT>
77
        </CONFIGURATION>
78
        <STATUS/>
79
        <SECURITY_PARAMETERS/>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_milano.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="c190a6a3-83dd-43a8-b9b2-f4db6a72beca_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_milano</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_milano";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
dc:contributor = xpath:"//dc:contributor";
37
dc:description = xpath:"//dc:description";
38
$varHttpTest = "''";
39
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
40
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/closedAccess')] and not(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/grantAgreement')])" dc:relation = skipRecord(); else $varDummy = "''";
41
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
42
static dr:dateOfCollection = getValue(CURRENTDATE, []);
43
// dc:type = xpath:"//dc:type";
44
dc:language = Convert(xpath:"//dc:language", Languages);
45
//dc:language = "eng";
46
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
47
dc:date = xpath:"//dc:date";
48
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
49
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
50
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
54
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
55
//
56
//
57
//
58
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
59
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
61
//
62
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
63
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
64
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
65
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
66
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
67
//
68
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
70
//
71
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
72
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
73
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
74
end
75
]]></CODE>
76
            </SCRIPT>
77
        </CONFIGURATION>
78
        <STATUS/>
79
        <SECURITY_PARAMETERS/>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="b844840b-4efa-40a1-a1ce-90b4b63972c2_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
30
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
31
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
35
dc:contributor = xpath:"//dc:contributor";
36
dc:description = xpath:"//dc:description";
37
$varHttpTest = "''";
38
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
39
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
40
static dr:dateOfCollection = getValue(CURRENTDATE, []);
41
// dc:type = xpath:"//dc:type";
42
dc:language = Convert(xpath:"//dc:language", Languages);
43
//dc:language = "eng";
44
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
45
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
46
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
47
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
48
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
49
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
50
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
51
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
52
//
53
//
54
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
55
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
56
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
57
//
58
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
59
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
60
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
61
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
62
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
63
//
64
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
65
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
66
//
67
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
68
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
69
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
70
end
71
]]></CODE>
72
            </SCRIPT>
73
        </CONFIGURATION>
74
        <STATUS/>
75
        <SECURITY_PARAMETERS/>
76
    </BODY>
77
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisPreprocessingJob" type="oozie">
11
 			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet.beta" />
16

  
17
				<!-- Runtime -->
18
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"  />
19
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"  />
20
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"  />
21
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
22
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
24
        	</STATIC_CONFIGURATION>
25
        	<JOB_INTERFACE>
26
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
28
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
29
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
30
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
31
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
32
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
33
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
35
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
36
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
37
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
38
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
39
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
40
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
41
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
42
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
43
        	</JOB_INTERFACE>
44
        </HADOOP_JOB>
45
        <STATUS>
46
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
47
            <RUNNING_INSTANCES value="0"/>
48
            <CUMULATIVE_RUN value="0" />
49
        </STATUS>
50
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
51
    </BODY>
52
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_fraunhofer.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="3ff02e38-f3d2-4ecc-9d45-7100256c979d_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-04-29T03:51:40+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_fraunhofer</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant_fraunhofer";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
32
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
37
dc:contributor = xpath:"//dc:contributor";
38
dc:description = xpath:"//dc:description";
39
$varHttpTest = "''";
40
if xpath:"//dc:rights[starts-with(., 'openAccess')]  or count(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/grantAgreement')]) > 0" $varHttpTest = "true"; else dc:identifier = skipRecord();
41
//if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
42
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
43
static dr:dateOfCollection = getValue(CURRENTDATE, []);
44
// dc:type = xpath:"//dc:type";
45
dc:language = Convert(xpath:"//dc:language", Languages);
46
//dc:language = "eng";
47
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
48
dc:date = xpath:"//dc:date";
49
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
50
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
51
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6 and not(contains(normalize-space(.), '/12345'))" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
54
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
55
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
56
//
57
//
58
//
59
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
60
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
61
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
62
//
63
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
64
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
65
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
66
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
67
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
68
//
69
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
70
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
71
//
72
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
73
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
74
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
75
end
76
]]></CODE>
77
            </SCRIPT>
78
        </CONFIGURATION>
79
        <STATUS/>
80
        <SECURITY_PARAMETERS/>
81
    </BODY>
82
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisMainJob" type="oozie">
11
            <DESCRIPTION>IIS main workflow</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
        	
14
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet.beta"/>
17

  
18
				<!-- Runtime -->
19
				<PROPERTY key="active_existence_filter" value="true"/>
20
                <PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
21
                <PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
22
   				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
                <PROPERTY key="export_action_hbase_table_initialize" value="true"/>
24
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
25
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
26
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27
            </STATIC_CONFIGURATION>
28
        	<JOB_INTERFACE>
29
        		<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint" />
30
        		<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing" />
31
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
32
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records" />
33
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
35
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
36
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
37
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
38
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
39
                <PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
40
                <PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
41
                <PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/>
42
                <PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
43
                <PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
44
                <PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
45
                <PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
46
                <PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
47
       		
48
        		<!-- flags to enable/disable IIS modules -->
49
        		<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module" />
50
        		<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module" />
51
        		<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module" />
52
        		<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module" />
53
        		<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module" />
54
        		<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module" />
55
        		<PARAM name="active_referenceextraction_researchinitiative" required="true" description="enable/disable the research initiative extraction module" />
56
        		<PARAM name="active_statistics" required="true" description="enable/disable the statistics module" />
57
        	</JOB_INTERFACE>
58
        </HADOOP_JOB>
59
        <STATUS>
60
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
61
            <RUNNING_INSTANCES value="0"/>
62
            <CUMULATIVE_RUN value="0"/>
63
        </STATUS>
64
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
65
    </BODY>
66
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupCandidateScanJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="675f1436-205a-4b19-8b6b-35e1c17fb125_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupCandidateScanJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.compress.map.output" value="true" />	
30
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
33
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
34
			
35
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37
				
38
		<!--  	Uncomment to override the default lib path -->			
39
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40
        	</STATIC_CONFIGURATION>
41
        	<JOB_INTERFACE>
42
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
43
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
44
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
45
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
46
        		<PARAM name="dedup.pace.conf" required="true" description="dedup pace configuration" />
47
        		<PARAM name="dedup.wf.conf" required="true" description="dedup workflow configuration" />        		
48
        	</JOB_INTERFACE>
49
        	<SCAN>
50
        		<FILTERS operator="MUST_PASS_ALL">
51
        			<FILTER type="prefix" param="entityTypeId" />
52
        		</FILTERS>
53
        		<FAMILIES>
54
        			<FAMILY param="entityType" />
55
           			<FAMILY value="resultResult_dedup_merges" />
56
        			<FAMILY value="personPerson_dedup_merges" />
57
     				<FAMILY value="organizationOrganization_dedup_merges" />        			
58
        		</FAMILIES>
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_dcrelation_last.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="351c08cd-08a7-4a7b-b629-80b828553164_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_dcrelation_last</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant_dcrelation_last";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
32
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
36
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
37
dc:contributor = xpath:"//dc:contributor";
38
dc:description = xpath:"//dc:description";
39
$varHttpTest = "''";
40
$varHttpLinks = xpath:"//dc:relation[starts-with(., 'http')]";
41
if xpath:"count($varHttpLinks) &gt; 0" $varHttpTest = "true"; else dc:identifier = skipRecord();
42
dc:identifier = xpath:"$varHttpLinks[last()][normalize-space(.)]";
43
dr:CobjIdentifier = xpath:"//dc:identifier[normalize-space(.)]";
44
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http') and position=last()" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
45
static dr:dateOfCollection = getValue(CURRENTDATE, []);
46
// dc:type = xpath:"//dc:type";
47
dc:language = Convert(xpath:"//dc:language", Languages);
48
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
49
dc:date = xpath:"//dc:date";
50
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
51
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
52
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
54
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
55
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
56
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
57
//
58
//
59
//
60
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
61
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
62
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
63
//
64
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
65
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
66
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
67
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
68
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
69
//
70
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
71
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
72
//
73
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
74
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
75
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
76
end
77
]]></CODE>
78
            </SCRIPT>
79
        </CONFIGURATION>
80
        <STATUS/>
81
        <SECURITY_PARAMETERS/>
82
    </BODY>
83
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/oaiFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="03d7af20-63bb-4790-a052-6cdbc1e05fce_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2015-02-09T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="oaiFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that feeds the OAI store</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />				
22
				
23
				<!-- JOB GLOBAL -->		
24
<!--                 <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> -->
25
<!--                 <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> -->
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
				<!--  	Uncomment to override the default lib path -->			
34
				<PROPERTY key="job.lib" value="/user/dnet/lib/dnet-mapreduce-jobs-assembly-0.0.6.3-SNAPSHOT.jar"/> 
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
38
				<PARAM name="services.publisher.oai.collection" required="true" description="target mongodb collection" />
39
				<PARAM name="oaiConfiguration" required="true" description="configuration bean used to guide the OAI feeding" />
40
				<PARAM name="oai.feed.date" required="true" description="timestamp" />				
41
				<PARAM name="services.publisher.oai.host" required="true" description="mongodb host" />
42
				<PARAM name="services.publisher.oai.port" required="true" description="mongodb port" />
43
				<PARAM name="services.publisher.oai.db" required="true" description="mongodb database name" />	
44
				<PARAM name="services.publisher.oai.skipDuplicates" required="true" description="skip duplicated records." />	
45
				<PARAM name="services.publisher.oai.duplicateXPath" required="true" description="records with this xpath are identified as duplicates" />																
46
        	</JOB_INTERFACE>
47
        	<SCAN>
48
        		<FILTERS />
49
        		<FAMILIES />
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_compliant_tropmed.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="75b06691-81d5-47c4-9626-bf1dd5b21122_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_compliant_tropmed</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_compliant_tropmed";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
dc:contributor = xpath:"//dc:contributor";
37
dc:description = xpath:"//dc:description";
38
$varHttpTest = "''";
39
if xpath:"//dc:identifier[starts-with(., 'http')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
40
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http://hdl.handle.net/10390/')" dc:identifier = xpath:"concat('http://dspace.itg.be/handle/10390/', substring-after(normalize-space(.), 'http://hdl.handle.net/10390/'))"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
41
static dr:dateOfCollection = getValue(CURRENTDATE, []);
42
// dc:type = xpath:"//dc:type";
43
dc:language = Convert(xpath:"//dc:language", Languages);
44
//dc:language = "eng";
45
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
46
dc:date = xpath:"//dc:date";
47
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
48
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
49
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
50
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
53
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
54
//
55
//
56
//
57
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
58
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
59
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
//
61
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
62
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
63
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
64
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
65
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
66
//
67
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
68
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
//
70
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
71
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
72
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
73
end
74
]]></CODE>
75
            </SCRIPT>
76
        </CONFIGURATION>
77
        <STATUS/>
78
        <SECURITY_PARAMETERS/>
79
    </BODY>
80
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.4/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/indexFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="1c34963b-75b3-4440-9f42-72445a26c077_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="indexFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.IndexFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- JOB GLOBAL -->		
24
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<PROPERTY key="mapred.fairscheduler.pool" value="solr"/>
32
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
33
				
34
		<!--  	Uncomment to override the default lib path -->			
35
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
36
        	</STATIC_CONFIGURATION>
37
        	<JOB_INTERFACE>
38
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
39
        		<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" />
40
        		
41
        		<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> 
42
       			<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" />
43

  
44
       			<PARAM name="id" required="true" description="index DS id" />
45
				<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" />
46
       			<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" />
47
       			<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" />
48
				<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" />
49
				<PARAM name="index.xslt" required="true" description="record transformer created by the MSRO service" />
50
        	</JOB_INTERFACE>
51
        	<SCAN>
52
        		<FILTERS />
53
        		<FAMILIES />
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff