Project

General

Profile

1 60995 alessia.ba
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="e03f256e-1e4d-4b3d-9c07-91faf5d25208_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2021-04-19T13:28:18+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>Import DOIboost</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14
            <NODE isStart="true" name="setMAGDumpPath" type="SetEnvParameter">
15
                <DESCRIPTION>set the input path for MAG</DESCRIPTION>
16
                <PARAMETERS>
17
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">MAGDumpPath</PARAM>
18
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/mag-2020-11-09</PARAM>
19
                </PARAMETERS>
20
                <ARCS>
21
                    <ARC to="waitConfig"/>
22
                </ARCS>
23
            </NODE>
24
            <NODE isStart="true" name="setInputPathMAG" type="SetEnvParameter">
25
                <DESCRIPTION>set the input path for MAG</DESCRIPTION>
26
                <PARAMETERS>
27
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathMAG</PARAM>
28
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/mag</PARAM>
29
                </PARAMETERS>
30
                <ARCS>
31
                    <ARC to="waitConfig"/>
32
                </ARCS>
33
            </NODE>
34
            <NODE isStart="true" name="setInputPathCrossref" type="SetEnvParameter">
35
                <DESCRIPTION>set the input path for Crossref</DESCRIPTION>
36
                <PARAMETERS>
37
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathCrossref</PARAM>
38
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/crossref</PARAM>
39
                </PARAMETERS>
40
                <ARCS>
41
                    <ARC to="waitConfig"/>
42
                </ARCS>
43
            </NODE>
44
            <NODE isStart="true" name="setCrossrefTimestamp" type="SetEnvParameter">
45
                <DESCRIPTION>set the timestamp for the Crossref incremental harvesting</DESCRIPTION>
46
                <PARAMETERS>
47
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">crossrefTimestamp</PARAM>
48
                    <PARAM managedBy="user" name="parameterValue" required="false" type="string">1607614921429</PARAM>
49
                </PARAMETERS>
50
                <ARCS>
51
                    <ARC to="waitConfig"/>
52
                </ARCS>
53
            </NODE>
54
            <NODE isStart="true" name="setInputPathUnpayWall" type="SetEnvParameter">
55
                <DESCRIPTION>set the input path for UnpayWall</DESCRIPTION>
56
                <PARAMETERS>
57
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathUnpayWall</PARAM>
58
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/unpayWall</PARAM>
59
                </PARAMETERS>
60
                <ARCS>
61
                    <ARC to="waitConfig"/>
62
                </ARCS>
63
            </NODE>
64
            <NODE isStart="true" name="setInputPathOrcid" type="SetEnvParameter">
65
                <DESCRIPTION>set the input path for ORCID</DESCRIPTION>
66
                <PARAMETERS>
67
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathOrcid</PARAM>
68
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/orcid_activities_2020/last_orcid_dataset</PARAM>
69
                </PARAMETERS>
70
                <ARCS>
71
                    <ARC to="waitConfig"/>
72
                </ARCS>
73
            </NODE>
74
            <NODE isStart="true" name="setWorkingPathOrcid" type="SetEnvParameter">
75
                <DESCRIPTION>set the working path for ORCID</DESCRIPTION>
76
                <PARAMETERS>
77
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">workingPathOrcid</PARAM>
78
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/orcid</PARAM>
79
                </PARAMETERS>
80
                <ARCS>
81
                    <ARC to="waitConfig"/>
82
                </ARCS>
83
            </NODE>
84
            <NODE isStart="true" name="setHostedByMapPath" type="SetEnvParameter">
85
                <DESCRIPTION>set the hostedBy map path</DESCRIPTION>
86
                <PARAMETERS>
87
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">hostedByMapPath</PARAM>
88
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/hostedBy/hbMap.gz</PARAM>
89
                </PARAMETERS>
90
                <ARCS>
91
                    <ARC to="waitConfig"/>
92
                </ARCS>
93
            </NODE>
94
            <NODE isStart="true" name="setResumeFrom" type="SetEnvParameter">
95
                <DESCRIPTION>set the oozie workflow name from which the execution will be resumed</DESCRIPTION>
96
                <PARAMETERS>
97
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">resumeFrom</PARAM>
98
                    <PARAM function="validValues(['ConvertCrossrefToOAF','ResetMagWorkingPath', 'PreprocessMag', 'PreprocessUW', 'PreprocessORCID', 'CreateDOIBoost', 'GenerateActionSet'])" managedBy="user" name="parameterValue" required="false" type="string">PreprocessORCID</PARAM>
99
                </PARAMETERS>
100
                <ARCS>
101
                    <ARC to="waitConfig"/>
102
                </ARCS>
103
            </NODE>
104
            <NODE isJoin="true" name="waitConfig">
105
                <DESCRIPTION>wait configurations</DESCRIPTION>
106
                <PARAMETERS/>
107
                <ARCS>
108
                    <ARC to="prepareActionSets"/>
109
                </ARCS>
110
            </NODE>
111
            <NODE name="prepareActionSets" type="PrepareActionSets">
112
                <DESCRIPTION>prepare action sets</DESCRIPTION>
113
                <PARAMETERS>
114
                    <PARAM managedBy="system" name="sets" required="true" type="string">
115
                        [
116
                        {
117
                        'set' : 'doiboost',
118
                        'jobProperty' : 'export_action_set_doiboost',
119
                        'enablingProperty' : 'active_doiboost',
120
                        'enabled' : 'true'
121
                        }
122
                        ]
123
                    </PARAM>
124
                </PARAMETERS>
125
                <ARCS>
126
                    <ARC to="extractOutputPath"/>
127
                </ARCS>
128
            </NODE>
129
            <NODE name="extractOutputPath" type="ExtractOutputPath">
130
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
131
                <PARAMETERS>
132
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
133
                </PARAMETERS>
134
                <ARCS>
135
                    <ARC to="updateDOIBoost"/>
136
                </ARCS>
137
            </NODE>
138
            <NODE name="updateDOIBoost" type="SubmitHadoopJob">
139
                <DESCRIPTION>prepare a new version of DOIBoost</DESCRIPTION>
140
                <PARAMETERS>
141
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
142
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
143
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
144
                        {
145
                        'crossrefTimestamp' : 'crossrefTimestamp',
146
                        'hostedByMapPath' : 'hostedByMapPath',
147
                        'MAGDumpPath' :'MAGDumpPath',
148
                        'inputPathMAG' : 'inputPathMAG',
149
                        'inputPathCrossref' : 'inputPathCrossref',
150
                        'inputPathUnpayWall' : 'inputPathUnpayWall',
151
                        'inputPathOrcid' : 'inputPathOrcid',
152
                        'outputPath' : 'outputPath',
153
                        'workingPathOrcid':'workingPathOrcid',
154
                        'resumeFrom' : 'resumeFrom'
155
                        }
156
                    </PARAM>
157
                    <PARAM managedBy="system" name="params" required="true" type="string">
158
                        {
159
                        'oozie.wf.application.path' : ' /lib/dnet/STABLE_IDS/actionmanager/doiboost/oozie_app',
160
                        'workingPath' : '/data/doiboost/process',
161
                        'sparkExecutorIntersectionMemory' : '9G',
162
                        'sparkExecutorMemory' : '6G',
163
                        'esServer' : 'ip-90-147-167-25.ct1.garrservices.it',
164
                        'esIndex' : 'crossref'
165
                        }
166
                    </PARAM>
167
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
168
                </PARAMETERS>
169
                <ARCS>
170
                    <ARC to="updateActionSets"/>
171
                </ARCS>
172
            </NODE>
173
            <NODE name="updateActionSets" type="UpdateActionSets">
174
                <DESCRIPTION>update action sets</DESCRIPTION>
175
                <PARAMETERS/>
176
                <ARCS>
177
                    <ARC to="success"/>
178
                </ARCS>
179
            </NODE>
180
        </CONFIGURATION>
181
        <STATUS>
182
            <LAST_EXECUTION_ID>wf_20210416_122501_878</LAST_EXECUTION_ID>
183
            <LAST_EXECUTION_DATE>2021-04-16T13:58:50+00:00</LAST_EXECUTION_DATE>
184
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
185
            <LAST_EXECUTION_ERROR/>
186
        </STATUS>
187
    </BODY>
188
</RESOURCE_PROFILE>