Revision 52913
Added by Sandro La Bruzzo over 5 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/actions/ExtractOutputPathJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.actions; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
import com.googlecode.sarasvati.Arc; |
|
5 |
import com.googlecode.sarasvati.NodeToken; |
|
6 |
import eu.dnetlib.msro.rmi.MSROException; |
|
7 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
8 |
import org.apache.commons.lang3.StringUtils; |
|
9 |
|
|
10 |
import java.util.List; |
|
11 |
import java.util.Map; |
|
12 |
|
|
13 |
public class ExtractOutputPathJobNode extends SimpleJobNode { |
|
14 |
|
|
15 |
private String hdfsOutputPathParam; |
|
16 |
|
|
17 |
|
|
18 |
@Override |
|
19 |
protected String execute(NodeToken token) throws Exception { |
|
20 |
final String sets = token.getEnv().getAttribute("sets"); |
|
21 |
if (StringUtils.isBlank(sets)) |
|
22 |
throw new MSROException("¯\\\\_(ツ)_/¯ cannot find sets on env"); |
|
23 |
|
|
24 |
final List<Map<String, String>> setsMap = new Gson().fromJson(sets, List.class); |
|
25 |
if (setsMap== null || setsMap.size() !=1) { |
|
26 |
throw new MSROException("¯\\\\_(ツ)_/¯ Sets map from json is wrong!"); |
|
27 |
} |
|
28 |
|
|
29 |
|
|
30 |
final String path = setsMap.get(0).get("path"); |
|
31 |
if (StringUtils.isEmpty(path)){ |
|
32 |
throw new MSROException("Path is empty"); |
|
33 |
} |
|
34 |
token.getEnv().setAttribute(getHdfsOutputPathParam(), path); |
|
35 |
return Arc.DEFAULT_ARC; |
|
36 |
} |
|
37 |
|
|
38 |
|
|
39 |
public String getHdfsOutputPathParam() { |
|
40 |
return hdfsOutputPathParam; |
|
41 |
} |
|
42 |
|
|
43 |
public void setHdfsOutputPathParam(String hdfsOutputPathParam) { |
|
44 |
this.hdfsOutputPathParam = hdfsOutputPathParam; |
|
45 |
} |
|
46 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importScholexplorer.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="e03f256e-1e4d-4b3d-9c07-91faf5d25207_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Import ScholExplorer Links and entities</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="setInputPath" isStart="true" type="SetHdfsFile"> |
|
17 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/dli/export/scolixDumpExport</PARAM> |
|
20 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
21 |
</PARAMETERS> |
|
22 |
<ARCS> |
|
23 |
<ARC to="prepareActionSets"/> |
|
24 |
</ARCS> |
|
25 |
</NODE> |
|
26 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
27 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
28 |
<PARAMETERS> |
|
29 |
<PARAM required="true" type="string" name="sets" managedBy="system"> |
|
30 |
[ |
|
31 |
{ |
|
32 |
'set' : 'scholexplorer-dump', |
|
33 |
'jobProperty' : 'export_action_set_scholexplorer_dump', |
|
34 |
'enablingProperty' : 'active_scholexplorer_dump', |
|
35 |
'enabled' : 'true' |
|
36 |
} |
|
37 |
] |
|
38 |
</PARAM> |
|
39 |
</PARAMETERS> |
|
40 |
<ARCS> |
|
41 |
<ARC to="extractOutputPath"/> |
|
42 |
</ARCS> |
|
43 |
</NODE> |
|
44 |
|
|
45 |
<NODE name="extractOutputPath" type="ExtractOutputPath"> |
|
46 |
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION> |
|
47 |
<PARAMETERS> |
|
48 |
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM> |
|
49 |
|
|
50 |
</PARAMETERS> |
|
51 |
<ARCS> |
|
52 |
<ARC to="importActionSet"/> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
55 |
|
|
56 |
<NODE name="importActionSet" type="SubmitHadoopJob" isJoin="true"> |
|
57 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
58 |
<PARAMETERS> |
|
59 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">importScholexplorer</PARAM> |
|
60 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
61 |
{ |
|
62 |
'cluster' : 'cluster', |
|
63 |
'mapred.input.dir':'inputPath', |
|
64 |
'mapred.output.dir':'outputPath' |
|
65 |
} |
|
66 |
</PARAM> |
|
67 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
68 |
</PARAMETERS> |
|
69 |
<ARCS> |
|
70 |
<ARC to="updateActionSets"/> |
|
71 |
</ARCS> |
|
72 |
</NODE> |
|
73 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
74 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
75 |
<PARAMETERS/> |
|
76 |
<ARCS> |
|
77 |
<ARC to="success"/> |
|
78 |
</ARCS> |
|
79 |
</NODE> |
|
80 |
</CONFIGURATION> |
|
81 |
<STATUS/> |
|
82 |
</BODY> |
|
83 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml | ||
---|---|---|
307 | 307 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.FilterManagedDatasourcesJobNode" |
308 | 308 |
scope="prototype"/> |
309 | 309 |
|
310 |
|
|
311 |
<bean id="wfNodeExtractOutputPath" |
|
312 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.actions.ExtractOutputPathJobNode" |
|
313 |
scope="prototype"/> |
|
314 |
|
|
310 | 315 |
</beans> |
Also available in: Unified diff
added WF for tranform scholexplorer links into actionsets