Project

General

Profile

« Previous | Next » 

Revision 52913

added WF for tranform scholexplorer links into actionsets

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/actions/ExtractOutputPathJobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes.actions;
2

  
3
import com.google.gson.Gson;
4
import com.googlecode.sarasvati.Arc;
5
import com.googlecode.sarasvati.NodeToken;
6
import eu.dnetlib.msro.rmi.MSROException;
7
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
8
import org.apache.commons.lang3.StringUtils;
9

  
10
import java.util.List;
11
import java.util.Map;
12

  
13
public class ExtractOutputPathJobNode extends SimpleJobNode {
14

  
15
    private String hdfsOutputPathParam;
16

  
17

  
18
    @Override
19
    protected String execute(NodeToken token) throws Exception {
20
        final String sets = token.getEnv().getAttribute("sets");
21
        if (StringUtils.isBlank(sets))
22
            throw  new MSROException("¯\\\\_(ツ)_/¯ cannot find sets on env");
23

  
24
        final List<Map<String, String>> setsMap = new Gson().fromJson(sets, List.class);
25
        if (setsMap== null || setsMap.size() !=1)  {
26
            throw  new MSROException("¯\\\\_(ツ)_/¯ Sets map from json is wrong!");
27
        }
28

  
29

  
30
        final String path = setsMap.get(0).get("path");
31
        if (StringUtils.isEmpty(path)){
32
            throw new MSROException("Path is empty");
33
        }
34
        token.getEnv().setAttribute(getHdfsOutputPathParam(), path);
35
        return Arc.DEFAULT_ARC;
36
    }
37

  
38

  
39
    public String getHdfsOutputPathParam() {
40
        return hdfsOutputPathParam;
41
    }
42

  
43
    public void setHdfsOutputPathParam(String hdfsOutputPathParam) {
44
        this.hdfsOutputPathParam = hdfsOutputPathParam;
45
    }
46
}
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importScholexplorer.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
				value="e03f256e-1e4d-4b3d-9c07-91faf5d25207_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
6
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
7
		<RESOURCE_KIND value="WorkflowDSResources"/>
8
		<RESOURCE_URI value=""/>
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>Import ScholExplorer Links and entities</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
            <NODE name="setInputPath" isStart="true" type="SetHdfsFile">
17
                <DESCRIPTION>set the hdfs output path</DESCRIPTION>
18
                <PARAMETERS>
19
                    <PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/dli/export/scolixDumpExport</PARAM>
20
                    <PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
21
                </PARAMETERS>
22
                <ARCS>
23
                    <ARC to="prepareActionSets"/>
24
                </ARCS>
25
            </NODE>
26
			<NODE name="prepareActionSets" type="PrepareActionSets">
27
				<DESCRIPTION>prepare action sets</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="sets" managedBy="system">
30
						[
31
						{
32
						'set' : 'scholexplorer-dump',
33
						'jobProperty' : 'export_action_set_scholexplorer_dump',
34
						'enablingProperty' : 'active_scholexplorer_dump',
35
						'enabled' : 'true'
36
						}
37
						]
38
					</PARAM>
39
				</PARAMETERS>
40
				<ARCS>
41
					<ARC to="extractOutputPath"/>
42
				</ARCS>
43
			</NODE>
44

  
45
            <NODE name="extractOutputPath"  type="ExtractOutputPath">
46
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
47
                <PARAMETERS>
48
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
49

  
50
                </PARAMETERS>
51
                <ARCS>
52
                    <ARC to="importActionSet"/>
53
                </ARCS>
54
            </NODE>
55

  
56
			<NODE name="importActionSet" type="SubmitHadoopJob" isJoin="true">
57
				<DESCRIPTION>IIS main</DESCRIPTION>
58
				<PARAMETERS>
59
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">importScholexplorer</PARAM>
60
					<PARAM required="true" type="string" name="envParams" managedBy="system">
61
						{
62
						'cluster' : 'cluster',
63
						'mapred.input.dir':'inputPath',
64
                        'mapred.output.dir':'outputPath'
65
						}
66
					</PARAM>
67
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
68
				</PARAMETERS>
69
				<ARCS>
70
					<ARC to="updateActionSets"/>
71
				</ARCS>
72
			</NODE>
73
			<NODE name="updateActionSets" type="UpdateActionSets">
74
				<DESCRIPTION>update action sets</DESCRIPTION>
75
				<PARAMETERS/>
76
				<ARCS>
77
					<ARC to="success"/>
78
				</ARCS>
79
			</NODE>
80
		</CONFIGURATION>
81
		<STATUS/>
82
	</BODY>
83
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml
307 307
	      class="eu.dnetlib.msro.openaireplus.workflows.nodes.FilterManagedDatasourcesJobNode"
308 308
	      scope="prototype"/>
309 309

  
310

  
311
    <bean id="wfNodeExtractOutputPath"
312
          class="eu.dnetlib.msro.openaireplus.workflows.nodes.actions.ExtractOutputPathJobNode"
313
          scope="prototype"/>
314

  
310 315
</beans>

Also available in: Unified diff