Project

General

Profile

« Previous | Next » 

Revision 53709

added workflows for infoSpace counts

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/hbase.infospace.counts.xml
1
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="29b448c9-863b-45a2-b4a3-b694d2254f8a_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-11-01T10:13:43+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Data Provision Counts</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>35</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual"><!-- PREPARE NODES -->
14
            <NODE isStart="true" name="fetchRelClasses" type="FetchRelClasses">
15
                <DESCRIPTION/>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="relClassesProperty" required="true" type="string">dnet.openaire.model.relclasses.xquery</PARAM>
18
                    <PARAM managedBy="system" name="relClassesName" required="true" type="string">relClasses</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="countInfospace"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE isStart="true" name="fetchContexts" type="LoadContextsJob">
25
                <DESCRIPTION/>
26
                <PARAMETERS/>
27
                <ARCS>
28
                    <ARC to="countInfospace"/>
29
                </ARCS>
30
            </NODE>
31
            <NODE isStart="true" name="fetchEntityLinks" type="LoadEntityLinksJob">
32
                <DESCRIPTION/>
33
                <PARAMETERS/>
34
                <ARCS>
35
                    <ARC to="countInfospace"/>
36
                </ARCS>
37
            </NODE>
38
            <NODE isJoin="true" name="countInfospace" type="SubmitHadoopJob">
39
                <DESCRIPTION>M/R count entities and relationships</DESCRIPTION>
40
                <PARAMETERS>
41
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
42
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">countInfospaceJob</PARAM>
43
                    <PARAM managedBy="system" name="sysParams" required="true" type="string">
44
                        {
45
                        'hbase.mapred.inputtable' : 'hbase.mapred.datatable',
46
                        'hbase.mapreduce.inputtable' : 'hbase.mapred.datatable'
47
                        }
48
                    </PARAM>
49
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
50
                        {
51
                        'index.entity.links' : 'index.entity.links',
52
                        'contextmap' : 'contextmap',
53
                        'relClasses' : 'relClasses'
54
                        }
55
                    </PARAM>
56
                </PARAMETERS>
57
                <ARCS>
58
                    <ARC to="success"/>
59
                </ARCS>
60
            </NODE>
61
        </CONFIGURATION>
62
        <STATUS>
63
            <LAST_EXECUTION_ID>wf_20181107_103348_265</LAST_EXECUTION_ID>
64
            <LAST_EXECUTION_DATE>2018-11-08T08:48:56+00:00</LAST_EXECUTION_DATE>
65
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
66
            <LAST_EXECUTION_ERROR/>
67
        </STATUS>
68
    </BODY>
69
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/xmlRecords.counts.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="b030513b-69af-4a9f-90c7-90b96bd5eb5c_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-11-06T11:27:07+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>XML records counts</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual"><!-- PREPARE NODES -->
14
            <NODE isStart="true" name="setInputFilesPath" type="SetEnvParameter">
15
                <DESCRIPTION>set the input path on hdfs to read xml records</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputHdfsPath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="false" type="string">/tmp/indexrecords_db_openaireplus_services_TMF.seq</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="countXML"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE isJoin="true" name="countXML" type="SubmitHadoopJob">
25
                <DESCRIPTION>Run M/R count Job</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
28
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">xmlRecordCounterJob</PARAM>
29
                    <PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
30
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
31
                        {
32
                        'mapred.input.dir' : 'inputHdfsPath'
33
                        }
34
                    </PARAM>
35
                </PARAMETERS>
36
                <ARCS>
37
                    <ARC to="success"/>
38
                </ARCS>
39
            </NODE>
40
        </CONFIGURATION>
41
        <STATUS>
42
            <LAST_EXECUTION_ID>wf_20181022_103455_527</LAST_EXECUTION_ID>
43
            <LAST_EXECUTION_DATE>2018-10-22T12:44:29+00:00</LAST_EXECUTION_DATE>
44
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
45
            <LAST_EXECUTION_ERROR/>
46
        </STATUS>
47
    </BODY>
48
</RESOURCE_PROFILE>

Also available in: Unified diff