Project

General

Profile

« Previous | Next » 

Revision 60995

Added by Alessia Bardi 8 months ago

Workflows that import data by-passing the aggregation system

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-scholexplorer.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="e03f256e-1e4d-4b3d-9c07-91faf5d25207_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2019-09-26T16:09:43+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import ScholExplorer Links and entities</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setInputPath" type="SetHdfsFile">
15
                <DESCRIPTION>set the hdfs output path</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/scholixDumpExport_2019_09_26</PARAM>
18
                    <PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="prepareActionSets"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE name="prepareActionSets" type="PrepareActionSets">
25
                <DESCRIPTION>prepare action sets</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="sets" required="true" type="string">
28
                        [
29
                        {
30
                        'set' : 'scholexplorer-dump',
31
                        'jobProperty' : 'export_action_set_scholexplorer_dump',
32
                        'enablingProperty' : 'active_scholexplorer_dump',
33
                        'enabled' : 'true'
34
                        }
35
                        ]
36
                    </PARAM>
37
                </PARAMETERS>
38
                <ARCS>
39
                    <ARC to="extractOutputPath"/>
40
                </ARCS>
41
            </NODE>
42
            <NODE name="extractOutputPath" type="ExtractOutputPath">
43
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
44
                <PARAMETERS>
45
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
46
                </PARAMETERS>
47
                <ARCS>
48
                    <ARC to="importActionSet"/>
49
                </ARCS>
50
            </NODE>
51
            <NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob">
52
                <DESCRIPTION>IIS main</DESCRIPTION>
53
                <PARAMETERS>
54
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">importScholexplorerJob</PARAM>
55
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
56
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
57
                        {
58
                        'mapred.input.dir':'inputPath',
59
                        'mapred.output.dir':'outputPath'
60
                        }
61
                    </PARAM>
62
                    <PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
63
                </PARAMETERS>
64
                <ARCS>
65
                    <ARC to="updateActionSets"/>
66
                </ARCS>
67
            </NODE>
68
            <NODE name="updateActionSets" type="UpdateActionSets">
69
                <DESCRIPTION>update action sets</DESCRIPTION>
70
                <PARAMETERS/>
71
                <ARCS>
72
                    <ARC to="success"/>
73
                </ARCS>
74
            </NODE>
75
        </CONFIGURATION>
76
        <STATUS>
77
            <LAST_EXECUTION_ID>wf_20191022_141741_170</LAST_EXECUTION_ID>
78
            <LAST_EXECUTION_DATE>2019-10-22T14:32:03+00:00</LAST_EXECUTION_DATE>
79
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
80
            <LAST_EXECUTION_ERROR/>
81
        </STATUS>
82
    </BODY>
83
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-doiboost-beta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="e03f256e-1e4d-4b3d-9c07-91faf5d25208_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-04-19T13:28:18+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import DOIboost</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setMAGDumpPath" type="SetEnvParameter">
15
                <DESCRIPTION>set the input path for MAG</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">MAGDumpPath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/mag-2020-11-09</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="waitConfig"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE isStart="true" name="setInputPathMAG" type="SetEnvParameter">
25
                <DESCRIPTION>set the input path for MAG</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathMAG</PARAM>
28
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/mag</PARAM>
29
                </PARAMETERS>
30
                <ARCS>
31
                    <ARC to="waitConfig"/>
32
                </ARCS>
33
            </NODE>
34
            <NODE isStart="true" name="setInputPathCrossref" type="SetEnvParameter">
35
                <DESCRIPTION>set the input path for Crossref</DESCRIPTION>
36
                <PARAMETERS>
37
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathCrossref</PARAM>
38
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/crossref</PARAM>
39
                </PARAMETERS>
40
                <ARCS>
41
                    <ARC to="waitConfig"/>
42
                </ARCS>
43
            </NODE>
44
            <NODE isStart="true" name="setCrossrefTimestamp" type="SetEnvParameter">
45
                <DESCRIPTION>set the timestamp for the Crossref incremental harvesting</DESCRIPTION>
46
                <PARAMETERS>
47
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">crossrefTimestamp</PARAM>
48
                    <PARAM managedBy="user" name="parameterValue" required="false" type="string">1607614921429</PARAM>
49
                </PARAMETERS>
50
                <ARCS>
51
                    <ARC to="waitConfig"/>
52
                </ARCS>
53
            </NODE>
54
            <NODE isStart="true" name="setInputPathUnpayWall" type="SetEnvParameter">
55
                <DESCRIPTION>set the input path for UnpayWall</DESCRIPTION>
56
                <PARAMETERS>
57
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathUnpayWall</PARAM>
58
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/unpayWall</PARAM>
59
                </PARAMETERS>
60
                <ARCS>
61
                    <ARC to="waitConfig"/>
62
                </ARCS>
63
            </NODE>
64
            <NODE isStart="true" name="setInputPathOrcid" type="SetEnvParameter">
65
                <DESCRIPTION>set the input path for ORCID</DESCRIPTION>
66
                <PARAMETERS>
67
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathOrcid</PARAM>
68
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/orcid_activities_2020/last_orcid_dataset</PARAM>
69
                </PARAMETERS>
70
                <ARCS>
71
                    <ARC to="waitConfig"/>
72
                </ARCS>
73
            </NODE>
74
            <NODE isStart="true" name="setWorkingPathOrcid" type="SetEnvParameter">
75
                <DESCRIPTION>set the working path for ORCID</DESCRIPTION>
76
                <PARAMETERS>
77
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">workingPathOrcid</PARAM>
78
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/orcid</PARAM>
79
                </PARAMETERS>
80
                <ARCS>
81
                    <ARC to="waitConfig"/>
82
                </ARCS>
83
            </NODE>
84
            <NODE isStart="true" name="setHostedByMapPath" type="SetEnvParameter">
85
                <DESCRIPTION>set the hostedBy map path</DESCRIPTION>
86
                <PARAMETERS>
87
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">hostedByMapPath</PARAM>
88
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/hostedBy/hbMap.gz</PARAM>
89
                </PARAMETERS>
90
                <ARCS>
91
                    <ARC to="waitConfig"/>
92
                </ARCS>
93
            </NODE>
94
            <NODE isStart="true" name="setResumeFrom" type="SetEnvParameter">
95
                <DESCRIPTION>set the oozie workflow name from which the execution will be resumed</DESCRIPTION>
96
                <PARAMETERS>
97
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">resumeFrom</PARAM>
98
                    <PARAM function="validValues(['ConvertCrossrefToOAF','ResetMagWorkingPath', 'PreprocessMag', 'PreprocessUW', 'PreprocessORCID', 'CreateDOIBoost', 'GenerateActionSet'])" managedBy="user" name="parameterValue" required="false" type="string">PreprocessORCID</PARAM>
99
                </PARAMETERS>
100
                <ARCS>
101
                    <ARC to="waitConfig"/>
102
                </ARCS>
103
            </NODE>
104
            <NODE isJoin="true" name="waitConfig">
105
                <DESCRIPTION>wait configurations</DESCRIPTION>
106
                <PARAMETERS/>
107
                <ARCS>
108
                    <ARC to="prepareActionSets"/>
109
                </ARCS>
110
            </NODE>
111
            <NODE name="prepareActionSets" type="PrepareActionSets">
112
                <DESCRIPTION>prepare action sets</DESCRIPTION>
113
                <PARAMETERS>
114
                    <PARAM managedBy="system" name="sets" required="true" type="string">
115
                        [
116
                        {
117
                        'set' : 'doiboost',
118
                        'jobProperty' : 'export_action_set_doiboost',
119
                        'enablingProperty' : 'active_doiboost',
120
                        'enabled' : 'true'
121
                        }
122
                        ]
123
                    </PARAM>
124
                </PARAMETERS>
125
                <ARCS>
126
                    <ARC to="extractOutputPath"/>
127
                </ARCS>
128
            </NODE>
129
            <NODE name="extractOutputPath" type="ExtractOutputPath">
130
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
131
                <PARAMETERS>
132
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
133
                </PARAMETERS>
134
                <ARCS>
135
                    <ARC to="updateDOIBoost"/>
136
                </ARCS>
137
            </NODE>
138
            <NODE name="updateDOIBoost" type="SubmitHadoopJob">
139
                <DESCRIPTION>prepare a new version of DOIBoost</DESCRIPTION>
140
                <PARAMETERS>
141
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
142
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
143
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
144
                        {
145
                        'crossrefTimestamp' : 'crossrefTimestamp',
146
                        'hostedByMapPath' : 'hostedByMapPath',
147
                        'MAGDumpPath' :'MAGDumpPath',
148
                        'inputPathMAG' : 'inputPathMAG',
149
                        'inputPathCrossref' : 'inputPathCrossref',
150
                        'inputPathUnpayWall' : 'inputPathUnpayWall',
151
                        'inputPathOrcid' : 'inputPathOrcid',
152
                        'outputPath' : 'outputPath',
153
                        'workingPathOrcid':'workingPathOrcid',
154
                        'resumeFrom' : 'resumeFrom'
155
                        }
156
                    </PARAM>
157
                    <PARAM managedBy="system" name="params" required="true" type="string">
158
                        {
159
                        'oozie.wf.application.path' : ' /lib/dnet/STABLE_IDS/actionmanager/doiboost/oozie_app',
160
                        'workingPath' : '/data/doiboost/process',
161
                        'sparkExecutorIntersectionMemory' : '9G',
162
                        'sparkExecutorMemory' : '6G',
163
                        'esServer' : 'ip-90-147-167-25.ct1.garrservices.it',
164
                        'esIndex' : 'crossref'
165
                        }
166
                    </PARAM>
167
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
168
                </PARAMETERS>
169
                <ARCS>
170
                    <ARC to="updateActionSets"/>
171
                </ARCS>
172
            </NODE>
173
            <NODE name="updateActionSets" type="UpdateActionSets">
174
                <DESCRIPTION>update action sets</DESCRIPTION>
175
                <PARAMETERS/>
176
                <ARCS>
177
                    <ARC to="success"/>
178
                </ARCS>
179
            </NODE>
180
        </CONFIGURATION>
181
        <STATUS>
182
            <LAST_EXECUTION_ID>wf_20210416_122501_878</LAST_EXECUTION_ID>
183
            <LAST_EXECUTION_DATE>2021-04-16T13:58:50+00:00</LAST_EXECUTION_DATE>
184
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
185
            <LAST_EXECUTION_ERROR/>
186
        </STATUS>
187
    </BODY>
188
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-ror-beta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="7cf3cfed-fbfb-46ca-b4da-aa43beb58f19_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-05-06T09:48:33+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Update ROR actionset</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import Infospace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setInputPath" type="SetEnvParameter">
15
                <DESCRIPTION>Set the base path containing the no_doi_dataset folder</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/ror/ror-data-2021-04-06.json</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="prepareActionSets"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE name="prepareActionSets" type="PrepareActionSets">
25
                <DESCRIPTION>prepare action sets</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="sets" required="true" type="string">
28
                        [
29
                        {
30
                        'set' : 'ror',
31
                        'jobProperty' : 'export_action_set_ror',
32
                        'enablingProperty' : 'active_ror',
33
                        'enabled' : 'true'
34
                        }
35
                        ]
36
                    </PARAM>
37
                </PARAMETERS>
38
                <ARCS>
39
                    <ARC to="extractOutputPath"/>
40
                </ARCS>
41
            </NODE>
42
            <NODE name="extractOutputPath" type="ExtractOutputPath">
43
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
44
                <PARAMETERS>
45
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
46
                </PARAMETERS>
47
                <ARCS>
48
                    <ARC to="rorUpdate"/>
49
                </ARCS>
50
            </NODE>
51
            <NODE name="rorUpdate" type="SubmitHadoopJob">
52
                <DESCRIPTION>update the ROR actionset</DESCRIPTION>
53
                <PARAMETERS>
54
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
55
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
56
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
57
                        {
58
                        'rorJsonInputPath' : 'inputPath',
59
                        'rorActionSetPath': 'outputPath'
60
                        }
61
                    </PARAM>
62
                    <PARAM managedBy="system" name="params" required="true" type="string">
63
                        {
64
                        'oozie.wf.application.path' : '/lib/dnet/STABLE_IDS/actionmanager/ror/oozie_app',
65
                        'workingDir': '/tmp/import_ror_actionset'
66
                        }
67
                    </PARAM>
68
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
69
                </PARAMETERS>
70
                <ARCS>
71
                    <ARC to="updateActionSets"/>
72
                </ARCS>
73
            </NODE>
74
            <NODE name="updateActionSets" type="UpdateActionSets">
75
                <DESCRIPTION>update action sets</DESCRIPTION>
76
                <PARAMETERS/>
77
                <ARCS>
78
                    <ARC to="success"/>
79
                </ARCS>
80
            </NODE>
81
        </CONFIGURATION>
82
        <STATUS>
83
            <LAST_EXECUTION_ID>wf_20210506_135405_174</LAST_EXECUTION_ID>
84
            <LAST_EXECUTION_DATE>2021-05-06T13:55:05+00:00</LAST_EXECUTION_DATE>
85
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
86
            <LAST_EXECUTION_ERROR/>
87
        </STATUS>
88
    </BODY>
89
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-h2020-classification-beta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="4bb067d5-a2f2-42b9-844c-4e1d8d71b80f_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2020-12-01T14:33:42+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import H2020classification</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import H2020classification</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="prepareActionSets" type="PrepareActionSets">
15
                <DESCRIPTION>prepare action sets</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="sets" required="true" type="string">
18
                        [
19
                        {
20
                        'set' : 'h2020classification',
21
                        'jobProperty' : 'export_action_set_h2020classification',
22
                        'enablingProperty' : 'active_h2020classification',
23
                        'enabled' : 'true'
24
                        }
25
                        ]
26
                    </PARAM>
27
                </PARAMETERS>
28
                <ARCS>
29
                    <ARC to="extractOutputPath"/>
30
                </ARCS>
31
            </NODE>
32
            <NODE name="extractOutputPath" type="ExtractOutputPath">
33
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
34
                <PARAMETERS>
35
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
36
                </PARAMETERS>
37
                <ARCS>
38
                    <ARC to="h2020ClassificationUpdate"/>
39
                </ARCS>
40
            </NODE>
41
            <NODE name="h2020ClassificationUpdate" type="SubmitHadoopJob">
42
                <DESCRIPTION>prepare updates for the H2020 Classification</DESCRIPTION>
43
                <PARAMETERS>
44
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
45
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
46
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
47
                        {
48
                        'outputPath': 'outputPath'
49
                        }
50
                    </PARAM>
51
                    <PARAM managedBy="system" name="params" required="true" type="string">
52
                        {
53
                        'oozie.wf.application.path' : ' /lib/dnet/BETA/actionmanager/project/oozie_app',
54
                        'workingDir' : '/tmp/beta_provision/working_dir/h2020classification',
55
                        'projectFileURL' : 'https://cordis.europa.eu/data/cordis-h2020projects.csv',
56
                        'programmeFileURL' : 'http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv',
57
                        'topicFileURL':'http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx',
58
                        'postgresURL':'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
59
                        'postgresUser':'dnet',
60
                        'postgresPassword':'dnetPwd'
61
                        }
62
                    </PARAM>
63
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
64
                </PARAMETERS>
65
                <ARCS>
66
                    <ARC to="updateActionSets"/>
67
                </ARCS>
68
            </NODE>
69
            <NODE name="updateActionSets" type="UpdateActionSets">
70
                <DESCRIPTION>update action sets</DESCRIPTION>
71
                <PARAMETERS/>
72
                <ARCS>
73
                    <ARC to="success"/>
74
                </ARCS>
75
            </NODE>
76
        </CONFIGURATION>
77
        <STATUS>
78
            <LAST_EXECUTION_ID>wf_20201005_124642_515</LAST_EXECUTION_ID>
79
            <LAST_EXECUTION_DATE>2020-10-05T12:57:35+00:00</LAST_EXECUTION_DATE>
80
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
81
            <LAST_EXECUTION_ERROR/>
82
        </STATUS>
83
    </BODY>
84
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-orcid-nodoi-beta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="7cf3cfed-fbfb-46ca-b4da-aa43beb58f18_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-04-22T09:03:40+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Update ORCID (no DOI) actionset</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import Infospace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setInputPath" type="SetEnvParameter">
15
                <DESCRIPTION>Set the base path containing the no_doi_dataset folder</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/orcid_activities_2020</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="prepareActionSets"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE name="prepareActionSets" type="PrepareActionSets">
25
                <DESCRIPTION>prepare action sets</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="sets" required="true" type="string">
28
                        [
29
                        {
30
                        'set' : 'orcidworks-no-doi',
31
                        'jobProperty' : 'export_action_set_orcidworks_no_doi',
32
                        'enablingProperty' : 'active_orcidworks_no_doi',
33
                        'enabled' : 'true'
34
                        }
35
                        ]
36
                    </PARAM>
37
                </PARAMETERS>
38
                <ARCS>
39
                    <ARC to="extractOutputPath"/>
40
                </ARCS>
41
            </NODE>
42
            <NODE name="extractOutputPath" type="ExtractOutputPath">
43
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
44
                <PARAMETERS>
45
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
46
                </PARAMETERS>
47
                <ARCS>
48
                    <ARC to="orcidNoDoiUpdate"/>
49
                </ARCS>
50
            </NODE>
51
            <NODE name="orcidNoDoiUpdate" type="SubmitHadoopJob">
52
                <DESCRIPTION>prepare updates for the Orcid No Doi</DESCRIPTION>
53
                <PARAMETERS>
54
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
55
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
56
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
57
                        {
58
                        'workingPath' : 'inputPath',
59
                        'outputPath': 'outputPath'
60
                        }
61
                    </PARAM>
62
                    <PARAM managedBy="system" name="params" required="true" type="string">
63
                        {
64
                        'oozie.wf.application.path' : '/lib/dnet/STABLE_IDS/actionmanager/orcidnodoi_actionset/oozie_app',
65
                        'spark2GenNoDoiDatasetMaxExecutors' : '200',
66
                        'spark2GenNoDoiDatasetExecutorMemory' : '2G'
67
                        }
68
                    </PARAM>
69
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
70
                </PARAMETERS>
71
                <ARCS>
72
                    <ARC to="updateActionSets"/>
73
                </ARCS>
74
            </NODE>
75
            <NODE name="updateActionSets" type="UpdateActionSets">
76
                <DESCRIPTION>update action sets</DESCRIPTION>
77
                <PARAMETERS/>
78
                <ARCS>
79
                    <ARC to="success"/>
80
                </ARCS>
81
            </NODE>
82
        </CONFIGURATION>
83
        <STATUS>
84
            <LAST_EXECUTION_ID>wf_20210505_073553_347</LAST_EXECUTION_ID>
85
            <LAST_EXECUTION_DATE>2021-05-05T07:43:23+00:00</LAST_EXECUTION_DATE>
86
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
87
            <LAST_EXECUTION_ERROR/>
88
        </STATUS>
89
    </BODY>
90
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-doiboost-prod.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="e03f256e-1e4d-4b3d-9c07-91faf5d25208_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-05-07T13:09:35+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import DOIboost</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setInputPathMAG" type="SetEnvParameter">
15
                <DESCRIPTION>set the input path for MAG</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathMAG</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/mag</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="waitConfig"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE isStart="true" name="setInputPathCrossref" type="SetEnvParameter">
25
                <DESCRIPTION>set the input path for Crossref</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathCrossref</PARAM>
28
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/crossref</PARAM>
29
                </PARAMETERS>
30
                <ARCS>
31
                    <ARC to="waitConfig"/>
32
                </ARCS>
33
            </NODE>
34
            <NODE isStart="true" name="setInputPathUnpayWall" type="SetEnvParameter">
35
                <DESCRIPTION>set the input path for UnpayWall</DESCRIPTION>
36
                <PARAMETERS>
37
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathUnpayWall</PARAM>
38
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/unpayWall</PARAM>
39
                </PARAMETERS>
40
                <ARCS>
41
                    <ARC to="waitConfig"/>
42
                </ARCS>
43
            </NODE>
44
            <NODE isStart="true" name="setWorkingPathOrcid" type="SetEnvParameter">
45
                <DESCRIPTION>set the working path for ORCID</DESCRIPTION>
46
                <PARAMETERS>
47
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">workingPathOrcid</PARAM>
48
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/orcid</PARAM>
49
                </PARAMETERS>
50
                <ARCS>
51
                    <ARC to="waitConfig"/>
52
                </ARCS>
53
            </NODE>
54
            <NODE isStart="true" name="setHostedByMapPath" type="SetEnvParameter">
55
                <DESCRIPTION>set the hostedBy map path</DESCRIPTION>
56
                <PARAMETERS>
57
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">hostedByMapPath</PARAM>
58
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/hostedBy/hbMap.gz</PARAM>
59
                </PARAMETERS>
60
                <ARCS>
61
                    <ARC to="waitConfig"/>
62
                </ARCS>
63
            </NODE>
64
            <NODE isStart="true" name="setResumeFrom" type="SetEnvParameter">
65
                <DESCRIPTION>set the oozie workflow name from which the execution will be resumed</DESCRIPTION>
66
                <PARAMETERS>
67
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">resumeFrom</PARAM>
68
                    <PARAM function="validValues(['ConvertCrossrefToOAF','PreprocessMag','PreprocessUW', 'ProcessORCID', 'CreateDOIBoost', 'GenerateActionSet'])" managedBy="user" name="parameterValue" required="false" type="string">ConvertCrossrefToOAF</PARAM>
69
                </PARAMETERS>
70
                <ARCS>
71
                    <ARC to="waitConfig"/>
72
                </ARCS>
73
            </NODE>
74
            <NODE isJoin="true" name="waitConfig">
75
                <DESCRIPTION>wait configurations</DESCRIPTION>
76
                <PARAMETERS/>
77
                <ARCS>
78
                    <ARC to="prepareActionSets"/>
79
                </ARCS>
80
            </NODE>
81
            <NODE name="prepareActionSets" type="PrepareActionSets">
82
                <DESCRIPTION>prepare action sets</DESCRIPTION>
83
                <PARAMETERS>
84
                    <PARAM managedBy="system" name="sets" required="true" type="string">
85
                        [
86
                        {
87
                        'set' : 'doiboost',
88
                        'jobProperty' : 'export_action_set_doiboost',
89
                        'enablingProperty' : 'active_doiboost',
90
                        'enabled' : 'true'
91
                        }
92
                        ]
93
                    </PARAM>
94
                </PARAMETERS>
95
                <ARCS>
96
                    <ARC to="extractOutputPath"/>
97
                </ARCS>
98
            </NODE>
99
            <NODE name="extractOutputPath" type="ExtractOutputPath">
100
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
101
                <PARAMETERS>
102
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
103
                </PARAMETERS>
104
                <ARCS>
105
                    <ARC to="updateDOIBoost"/>
106
                </ARCS>
107
            </NODE>
108
            <NODE name="updateDOIBoost" type="SubmitHadoopJob">
109
                <DESCRIPTION>prepare a new version of DOIBoost</DESCRIPTION>
110
                <PARAMETERS>
111
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
112
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
113
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
114
                        {
115
                        'hostedByMapPath' : 'hostedByMapPath',
116
                        'inputPathMAG' : 'inputPathMAG',
117
                        'inputPathCrossref' : 'inputPathCrossref',
118
                        'inputPathUnpayWall' : 'inputPathUnpayWall',
119
                        'outputPath' : 'outputPath',
120
                        'workingPathOrcid':'workingPathOrcid',
121
                        'resumeFrom' : 'resumeFrom'
122
                        }
123
                    </PARAM>
124
                    <PARAM managedBy="system" name="params" required="true" type="string">
125
                        {
126
                        'oozie.wf.application.path' : ' /lib/dnet/PROD/doiboost/process/oozie_app',
127
                        'workingPath' : '/data/doiboost/process_p',
128
                        'sparkExecutorIntersectionMemory' : '9G',
129
                        'sparkExecutorMemory' : '6G'
130
                        }
131
                    </PARAM>
132
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
133
                </PARAMETERS>
134
                <ARCS>
135
                    <ARC to="updateActionSets"/>
136
                </ARCS>
137
            </NODE>
138
            <NODE name="updateActionSets" type="UpdateActionSets">
139
                <DESCRIPTION>update action sets</DESCRIPTION>
140
                <PARAMETERS/>
141
                <ARCS>
142
                    <ARC to="success"/>
143
                </ARCS>
144
            </NODE>
145
        </CONFIGURATION>
146
        <STATUS>
147
            <LAST_EXECUTION_ID>wf_20210512_082537_192</LAST_EXECUTION_ID>
148
            <LAST_EXECUTION_DATE>2021-05-12T10:19:27+00:00</LAST_EXECUTION_DATE>
149
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
150
            <LAST_EXECUTION_ERROR/>
151
        </STATUS>
152
    </BODY>
153
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/import/import-datacite-beta.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="50593ba8-bb4b-413a-b852-b2c7e4a56093_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-04-21T09:11:15+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import Datacite ActionSet</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setDatacitePath" type="SetEnvParameter">
15
                <DESCRIPTION>set the input path for Datacite content</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">datacitePath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/datacite/datacite_oaf</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="prepareActionSets"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE name="prepareActionSets" type="PrepareActionSets">
25
                <DESCRIPTION>prepare action sets</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="sets" required="true" type="string">
28
                        [
29
                        {
30
                        'set' : 'datacite',
31
                        'jobProperty' : 'export_action_set_datacite',
32
                        'enablingProperty' : 'active_datacite',
33
                        'enabled' : 'true'
34
                        }
35
                        ]
36
                    </PARAM>
37
                </PARAMETERS>
38
                <ARCS>
39
                    <ARC to="extractOutputPath"/>
40
                </ARCS>
41
            </NODE>
42
            <NODE name="extractOutputPath" type="ExtractOutputPath">
43
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
44
                <PARAMETERS>
45
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
46
                </PARAMETERS>
47
                <ARCS>
48
                    <ARC to="generateDataciteActionSet"/>
49
                </ARCS>
50
            </NODE>
51
            <NODE name="generateDataciteActionSet" type="SubmitHadoopJob">
52
                <DESCRIPTION>prepare a new version of Datacite ActionSet</DESCRIPTION>
53
                <PARAMETERS>
54
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
55
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
56
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
57
                        {
58
                        'sourcePath' : 'datacitePath',
59
                        'outputPath' : 'outputPath'
60
                        }
61
                    </PARAM>
62
                    <PARAM managedBy="system" name="params" required="true" type="string">
63
                        {
64
                        'oozie.wf.application.path' : '/lib/dnet/STABLE_IDS/actionmanager/datacite_actionset/oozie_app',
65
                        'sparkExecutorMemory' : '7G'
66
                        }
67
                    </PARAM>
68
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
69
                </PARAMETERS>
70
                <ARCS>
71
                    <ARC to="updateActionSets"/>
72
                </ARCS>
73
            </NODE>
74
            <NODE name="updateActionSets" type="UpdateActionSets">
75
                <DESCRIPTION>update action sets</DESCRIPTION>
76
                <PARAMETERS/>
77
                <ARCS>
78
                    <ARC to="success"/>
79
                </ARCS>
80
            </NODE>
81
        </CONFIGURATION>
82
        <STATUS>
83
            <LAST_EXECUTION_ID>wf_20210421_091120_270</LAST_EXECUTION_ID>
84
            <LAST_EXECUTION_DATE>2021-04-21T09:17:25+00:00</LAST_EXECUTION_DATE>
85
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
86
            <LAST_EXECUTION_ERROR/>
87
        </STATUS>
88
    </BODY>
89
</RESOURCE_PROFILE>

Also available in: Unified diff