Project

General

Profile

« Previous | Next » 

Revision 28797

added wf to index an hdfs file

View differences:

modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/index/SetHdfsFileForIndexingJobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes.index;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5

  
6
import com.googlecode.sarasvati.Arc;
7
import com.googlecode.sarasvati.NodeToken;
8

  
9
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
10

  
11
public class SetHdfsFileForIndexingJobNode extends SimpleJobNode {
12

  
13
	/**
14
	 * logger.
15
	 */
16
	private static final Log log = LogFactory.getLog(SetHdfsFileForIndexingJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
17

  
18
	private String inputRecordsPath;
19

  
20
	private String inputRecordsPathParam;
21

  
22
	private String format;
23

  
24
	private String layout;
25

  
26
	private String interpretation;
27

  
28
	/**
29
	 * {@inheritDoc}
30
	 * 
31
	 * @see com.googlecode.sarasvati.mem.MemNode#execute(com.googlecode.sarasvati.Engine, com.googlecode.sarasvati.NodeToken)
32
	 */
33
	@Override
34
	public String execute(final NodeToken token) {
35

  
36
		log.info("hdfs path for indexing: " + getInputRecordsPath());
37

  
38
		token.getEnv().setAttribute(getInputRecordsPathParam(), getInputRecordsPath());
39
		token.getEnv().setAttribute("format", getFormat());
40
		token.getEnv().setAttribute("layout", getLayout());
41
		token.getEnv().setAttribute("interpretation", getInterpretation());
42

  
43
		return Arc.DEFAULT_ARC;
44
	}
45

  
46
	public String getInputRecordsPath() {
47
		return inputRecordsPath;
48
	}
49

  
50
	public void setInputRecordsPath(final String inputRecordsPath) {
51
		this.inputRecordsPath = inputRecordsPath;
52
	}
53

  
54
	public String getInputRecordsPathParam() {
55
		return inputRecordsPathParam;
56
	}
57

  
58
	public void setInputRecordsPathParam(final String inputRecordsPathParam) {
59
		this.inputRecordsPathParam = inputRecordsPathParam;
60
	}
61

  
62
	public String getFormat() {
63
		return format;
64
	}
65

  
66
	public void setFormat(final String format) {
67
		this.format = format;
68
	}
69

  
70
	public String getLayout() {
71
		return layout;
72
	}
73

  
74
	public void setLayout(final String layout) {
75
		this.layout = layout;
76
	}
77

  
78
	public String getInterpretation() {
79
		return interpretation;
80
	}
81

  
82
	public void setInterpretation(final String interpretation) {
83
		this.interpretation = interpretation;
84
	}
85

  
86
}
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/index.hdfs.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
5
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
6
		<RESOURCE_KIND value="WorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Index from HDFS</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="manual">
15
			<NODE name="setHdfs" type="SetHdfsFile" isStart="true">
16
				<DESCRIPTION />
17
				<PARAMETERS>
18
					<PARAM name="inputRecordsPath" type="string" managedBy="user" required="true">/tmp/indexrecords_db_openaireplus_node6_t_DMF.seq</PARAM>
19
					<PARAM name="inputRecordsPathParam" type="string" managedBy="system" required="true">inputRecordsPath</PARAM>
20
					<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM>
21
					<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM>
22
					<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM>													
23
				</PARAMETERS>
24
				<ARCS>
25
					<ARC to="prepare" />
26
				</ARCS>
27
			</NODE>
28
			<NODE name="prepare" type="PrepareIndexJob">
29
				<DESCRIPTION>Prepare indexing</DESCRIPTION>
30
				<PARAMETERS>
31
					<PARAM name="outputRecordsPathParam"  type="string" managedBy="system" required="true">outputRecordsPath</PARAM>
32
					<PARAM name="rottenRecordsPathParam"  type="string" managedBy="system" required="true">rottenRecordsPath</PARAM>
33
					<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM>
34
					<PARAM name="oafSchemaLocationProperty"  type="string" managedBy="system" required="true">oaf.schema.location</PARAM>
35
				</PARAMETERS>
36
				<ARCS>
37
					<ARC to="cleanupRotten" />
38
				</ARCS>
39
			</NODE>	
40
			<NODE name="cleanupRotten" type="DeleteHdfsPathJob">
41
				<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION>
42
				<PARAMETERS>
43
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
44
					<PARAM required="true" type="string" name="envParams" managedBy="system">
45
						{ 	
46
							'path' : 'rottenRecordsPath'
47
						}
48
					</PARAM>					
49
				</PARAMETERS>
50
				<ARCS>
51
					<ARC to="updateIndex" />
52
				</ARCS>
53
			</NODE>			
54
			<NODE name="updateIndex" type="SubmitHadoopJob">
55
				<DESCRIPTION>M/O index records</DESCRIPTION>
56
				<PARAMETERS>
57
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
58
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM>					
59
					<PARAM required="true" type="string" name="envParams" managedBy="system">
60
						{ 	
61
							'mapred.input.dir' : 'inputRecordsPath',
62
							'mapred.output.dir' : 'rottenRecordsPath',
63
							'index.xslt' : 'index.xslt',
64
							'index.solr.url.list' : 'index.solr.url.list',
65
							'index.solr.url.local' : 'index.solr.url.local',
66
							'index.solr.collection' : 'index.solr.collection',
67
							'index.buffer.flush.threshold' : 'index.buffer.flush.threshold',
68
							'index.shutdown.wait.time' : 'index.shutdown.wait.time',
69
							'index.solr.local.feeding' : 'index.solr.local.feeding',
70
							'index.solr.sim.mode' : 'index.solr.sim.mode',
71
							'index.feed.timestamp' : 'index.feed.timestamp'
72
						}
73
					</PARAM>
74
				</PARAMETERS>
75
				<ARCS>
76
					<ARC to="finalize" />
77
				</ARCS>
78
			</NODE>
79
			<NODE name="finalize" type="FinalizeIndexFeeding">
80
				<DESCRIPTION>commit changes</DESCRIPTION>
81
				<PARAMETERS />
82
				<ARCS>
83
					<ARC to="success" />
84
				</ARCS>
85
			</NODE>
86
		</CONFIGURATION>
87
		<STATUS />
88
	</BODY>
89
</RESOURCE_PROFILE>
90

  
91

  
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/index.hdfs.meta.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="7c582595-4f0e-4523-99fd-fb8729815f46_TWV0YVdvcmtmbG93RFNSZXNvdXJjZXMvTWV0YVdvcmtmbG93RFNSZXNvdXJjZVR5cGU=" />
5
		<RESOURCE_TYPE value="MetaWorkflowDSResourceType" />
6
		<RESOURCE_KIND value="MetaWorkflowDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
9
	</HEADER>
10
	<BODY>
11
		<METAWORKFLOW_NAME family="index_hbase">Index (from HDFS)</METAWORKFLOW_NAME>
12
		<METAWORKFLOW_DESCRIPTION></METAWORKFLOW_DESCRIPTION>
13
		<METAWORKFLOW_SECTION>InfoSpace Provision</METAWORKFLOW_SECTION>
14
		<ADMIN_EMAIL />
15
		<CONFIGURATION status="EXECUTABLE">
16
			<WORKFLOW id="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IndexHDFS" />
17
		</CONFIGURATION>
18
		<SCHEDULING enabled="false">
19
			<CRON>29 5 22 ? * *</CRON>
20
			<MININTERVAL>10080</MININTERVAL>
21
		</SCHEDULING>
22
	</BODY>
23
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml
105 105
<!-- 		p:targetTable="${hbase.mapred.datatable}" p:hFileOutput="${services.mapreduce.mdrecords.hdsf.hfile.output}" -->
106 106
<!-- 		p:xslt="classpath:/eu/dnetlib/data/transform/dmf_2_hbase.xsl" />	 -->
107 107

  
108
	<bean id="wfNodeSetHdfsFile" class="eu.dnetlib.msro.openaireplus.workflows.nodes.index.SetHdfsFileForIndexingJobNode"
109
		scope="prototype" />
108 110

  
109

  
110 111
	<bean id="wfNodeFindIndex" class="eu.dnetlib.msro.openaireplus.workflows.nodes.index.FindIndexJobNode"
111 112
		scope="prototype" />
112 113

  

Also available in: Unified diff