Project

General

Profile

« Previous | Next » 

Revision 55291

added hadoop Job of transform and collection for new Implemetation of Dnet Hadoop

View differences:

modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/DnetHadoopTransformationJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf711c9b-8c92-42ad-9fc2-797815e13760_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2019-04-11T11:54:24+02:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dnetHadoopTrasnformation" type="oozie">
11
            <DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION><!-- Cluster wide -->
13
                <PROPERTY key="queueName" value="default"/>
14
                <PROPERTY key="user.name" value="dnet"/><!-- Runtime -->
15
                <PROPERTY key="oozie.wf.application.path" value="TOADD"/>
16
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
17
                <PROPERTY key="oozie.use.system.libpath" value="True"/>
18
                <PROPERTY key="security_enabled" value="False"/>
19
                <PROPERTY key="dryrun" value="True"/>
20
                <PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/>
21
                <PROPERTY key="metadataEncoding" value="XML"/>
22
            </STATIC_CONFIGURATION>
23
            <JOB_INTERFACE>
24
                <PARAM description="the path of the input MDStore" name="mdstoreInputPath" required="true"/>
25
                <PARAM description="the path of the cleaned mdstore" name="mdstoreOutputPath" required="true"/>
26
                <PARAM description="The transformation Rule to apply" name="transformationRule" required="true"/>
27
                <PARAM description="The timestamp of the collection date" name="timestamp" required="true"/>
28
                <PARAM description="the Dnet Workflow Identifier" name="workflowId" required="true"/>
29
            </JOB_INTERFACE>
30
        </HADOOP_JOB>
31
        <STATUS>
32
            <LAST_SUBMISSION_DATE value="2019-04-11T15:34:37+02:00"/>
33
            <RUNNING_INSTANCES value="2"/>
34
            <CUMULATIVE_RUN value="71"/>
35
        </STATUS>
36
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
37
    </BODY>
38
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dnetHadoopCollectionJob.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2019-04-11T11:31:58+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <HADOOP_JOB name="dnetHadoopCollection" type="oozie">
12
            <DESCRIPTION>IIS preprocessing</DESCRIPTION>
13
            <STATIC_CONFIGURATION><!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
                <PROPERTY key="user.name" value="dnet"/><!-- Runtime -->
16
                <PROPERTY value="TOADD" key="oozie.wf.application.path"/>
17
                <PROPERTY value="false" key="oozie.wf.validate.ForkJoin"/>
18
                <PROPERTY key="oozie.use.system.libpath" value="True"/>
19
                <PROPERTY value="False" key="security_enabled"/>
20
                <PROPERTY key="dryrun" value="True"/>
21
                <PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/>
22
                <PROPERTY key="metadataEncoding" value="XML"/>
23
            </STATIC_CONFIGURATION>
24
            <JOB_INTERFACE>
25
                <PARAM name="workflowId" required="true" description="the Dnet Workflow Identifier"/>
26
                <PARAM required="true" description="the path to store the sequence file of the native metadata collected" name="sequenceFilePath"/><!-- value="/user/sandro.labruzzo/mdstores/oai_1"-->
27
                <PARAM description="the path of the native mdstore" required="true" name="mdStorePath"/>
28
                <PARAM description="A json encoding of the API Description class" name="apiDescription" required="true"/>
29
                <PARAM required="true" name="dataSourceInfo" description="A json encoding of the Datasource Info"/>
30
                <PARAM description="An xpath to retrieve the metadata idnentifier for the generation of DNet Identifier " name="identifierPath" required="true"/>
31
                <PARAM name="timestamp" description="The timestamp of the collection date" required="true"/>
32
            </JOB_INTERFACE>
33
        </HADOOP_JOB>
34
        <STATUS>
35
            <LAST_SUBMISSION_DATE value="2019-04-11T15:33:02+02:00"/>
36
            <RUNNING_INSTANCES value="2"/>
37
            <CUMULATIVE_RUN value="64"/>
38
        </STATUS>
39
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
40
    </BODY>
41
</RESOURCE_PROFILE>

Also available in: Unified diff