Revision 28797
Added by Claudio Atzori over 10 years ago
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/index/SetHdfsFileForIndexingJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.index; |
|
2 |
|
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
|
|
6 |
import com.googlecode.sarasvati.Arc; |
|
7 |
import com.googlecode.sarasvati.NodeToken; |
|
8 |
|
|
9 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
10 |
|
|
11 |
public class SetHdfsFileForIndexingJobNode extends SimpleJobNode { |
|
12 |
|
|
13 |
/** |
|
14 |
* logger. |
|
15 |
*/ |
|
16 |
private static final Log log = LogFactory.getLog(SetHdfsFileForIndexingJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
17 |
|
|
18 |
private String inputRecordsPath; |
|
19 |
|
|
20 |
private String inputRecordsPathParam; |
|
21 |
|
|
22 |
private String format; |
|
23 |
|
|
24 |
private String layout; |
|
25 |
|
|
26 |
private String interpretation; |
|
27 |
|
|
28 |
/** |
|
29 |
* {@inheritDoc} |
|
30 |
* |
|
31 |
* @see com.googlecode.sarasvati.mem.MemNode#execute(com.googlecode.sarasvati.Engine, com.googlecode.sarasvati.NodeToken) |
|
32 |
*/ |
|
33 |
@Override |
|
34 |
public String execute(final NodeToken token) { |
|
35 |
|
|
36 |
log.info("hdfs path for indexing: " + getInputRecordsPath()); |
|
37 |
|
|
38 |
token.getEnv().setAttribute(getInputRecordsPathParam(), getInputRecordsPath()); |
|
39 |
token.getEnv().setAttribute("format", getFormat()); |
|
40 |
token.getEnv().setAttribute("layout", getLayout()); |
|
41 |
token.getEnv().setAttribute("interpretation", getInterpretation()); |
|
42 |
|
|
43 |
return Arc.DEFAULT_ARC; |
|
44 |
} |
|
45 |
|
|
46 |
public String getInputRecordsPath() { |
|
47 |
return inputRecordsPath; |
|
48 |
} |
|
49 |
|
|
50 |
public void setInputRecordsPath(final String inputRecordsPath) { |
|
51 |
this.inputRecordsPath = inputRecordsPath; |
|
52 |
} |
|
53 |
|
|
54 |
public String getInputRecordsPathParam() { |
|
55 |
return inputRecordsPathParam; |
|
56 |
} |
|
57 |
|
|
58 |
public void setInputRecordsPathParam(final String inputRecordsPathParam) { |
|
59 |
this.inputRecordsPathParam = inputRecordsPathParam; |
|
60 |
} |
|
61 |
|
|
62 |
public String getFormat() { |
|
63 |
return format; |
|
64 |
} |
|
65 |
|
|
66 |
public void setFormat(final String format) { |
|
67 |
this.format = format; |
|
68 |
} |
|
69 |
|
|
70 |
public String getLayout() { |
|
71 |
return layout; |
|
72 |
} |
|
73 |
|
|
74 |
public void setLayout(final String layout) { |
|
75 |
this.layout = layout; |
|
76 |
} |
|
77 |
|
|
78 |
public String getInterpretation() { |
|
79 |
return interpretation; |
|
80 |
} |
|
81 |
|
|
82 |
public void setInterpretation(final String interpretation) { |
|
83 |
this.interpretation = interpretation; |
|
84 |
} |
|
85 |
|
|
86 |
} |
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/index.hdfs.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
7 |
<RESOURCE_URI value="" /> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>Index from HDFS</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>Provision</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="manual"> |
|
15 |
<NODE name="setHdfs" type="SetHdfsFile" isStart="true"> |
|
16 |
<DESCRIPTION /> |
|
17 |
<PARAMETERS> |
|
18 |
<PARAM name="inputRecordsPath" type="string" managedBy="user" required="true">/tmp/indexrecords_db_openaireplus_node6_t_DMF.seq</PARAM> |
|
19 |
<PARAM name="inputRecordsPathParam" type="string" managedBy="system" required="true">inputRecordsPath</PARAM> |
|
20 |
<PARAM name="format" type="string" managedBy="user" required="true">DMF</PARAM> |
|
21 |
<PARAM name="layout" type="string" managedBy="user" required="true">index</PARAM> |
|
22 |
<PARAM name="interpretation" type="string" managedBy="user" required="true">openaire</PARAM> |
|
23 |
</PARAMETERS> |
|
24 |
<ARCS> |
|
25 |
<ARC to="prepare" /> |
|
26 |
</ARCS> |
|
27 |
</NODE> |
|
28 |
<NODE name="prepare" type="PrepareIndexJob"> |
|
29 |
<DESCRIPTION>Prepare indexing</DESCRIPTION> |
|
30 |
<PARAMETERS> |
|
31 |
<PARAM name="outputRecordsPathParam" type="string" managedBy="system" required="true">outputRecordsPath</PARAM> |
|
32 |
<PARAM name="rottenRecordsPathParam" type="string" managedBy="system" required="true">rottenRecordsPath</PARAM> |
|
33 |
<PARAM name="layoutToRecordStylesheet" type="string" managedBy="system" required="true">/eu/dnetlib/msro/openaireplus/workflows/index/openaireLayoutToRecordStylesheet.xsl</PARAM> |
|
34 |
<PARAM name="oafSchemaLocationProperty" type="string" managedBy="system" required="true">oaf.schema.location</PARAM> |
|
35 |
</PARAMETERS> |
|
36 |
<ARCS> |
|
37 |
<ARC to="cleanupRotten" /> |
|
38 |
</ARCS> |
|
39 |
</NODE> |
|
40 |
<NODE name="cleanupRotten" type="DeleteHdfsPathJob"> |
|
41 |
<DESCRIPTION>hdfs cleanup (rotten)</DESCRIPTION> |
|
42 |
<PARAMETERS> |
|
43 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
44 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
45 |
{ |
|
46 |
'path' : 'rottenRecordsPath' |
|
47 |
} |
|
48 |
</PARAM> |
|
49 |
</PARAMETERS> |
|
50 |
<ARCS> |
|
51 |
<ARC to="updateIndex" /> |
|
52 |
</ARCS> |
|
53 |
</NODE> |
|
54 |
<NODE name="updateIndex" type="SubmitHadoopJob"> |
|
55 |
<DESCRIPTION>M/O index records</DESCRIPTION> |
|
56 |
<PARAMETERS> |
|
57 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
58 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">indexFeedJob</PARAM> |
|
59 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
60 |
{ |
|
61 |
'mapred.input.dir' : 'inputRecordsPath', |
|
62 |
'mapred.output.dir' : 'rottenRecordsPath', |
|
63 |
'index.xslt' : 'index.xslt', |
|
64 |
'index.solr.url.list' : 'index.solr.url.list', |
|
65 |
'index.solr.url.local' : 'index.solr.url.local', |
|
66 |
'index.solr.collection' : 'index.solr.collection', |
|
67 |
'index.buffer.flush.threshold' : 'index.buffer.flush.threshold', |
|
68 |
'index.shutdown.wait.time' : 'index.shutdown.wait.time', |
|
69 |
'index.solr.local.feeding' : 'index.solr.local.feeding', |
|
70 |
'index.solr.sim.mode' : 'index.solr.sim.mode', |
|
71 |
'index.feed.timestamp' : 'index.feed.timestamp' |
|
72 |
} |
|
73 |
</PARAM> |
|
74 |
</PARAMETERS> |
|
75 |
<ARCS> |
|
76 |
<ARC to="finalize" /> |
|
77 |
</ARCS> |
|
78 |
</NODE> |
|
79 |
<NODE name="finalize" type="FinalizeIndexFeeding"> |
|
80 |
<DESCRIPTION>commit changes</DESCRIPTION> |
|
81 |
<PARAMETERS /> |
|
82 |
<ARCS> |
|
83 |
<ARC to="success" /> |
|
84 |
</ARCS> |
|
85 |
</NODE> |
|
86 |
</CONFIGURATION> |
|
87 |
<STATUS /> |
|
88 |
</BODY> |
|
89 |
</RESOURCE_PROFILE> |
|
90 |
|
|
91 |
|
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/provision/index.hdfs.meta.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="7c582595-4f0e-4523-99fd-fb8729815f46_TWV0YVdvcmtmbG93RFNSZXNvdXJjZXMvTWV0YVdvcmtmbG93RFNSZXNvdXJjZVR5cGU=" /> |
|
5 |
<RESOURCE_TYPE value="MetaWorkflowDSResourceType" /> |
|
6 |
<RESOURCE_KIND value="MetaWorkflowDSResources" /> |
|
7 |
<RESOURCE_URI value="" /> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<METAWORKFLOW_NAME family="index_hbase">Index (from HDFS)</METAWORKFLOW_NAME> |
|
12 |
<METAWORKFLOW_DESCRIPTION></METAWORKFLOW_DESCRIPTION> |
|
13 |
<METAWORKFLOW_SECTION>InfoSpace Provision</METAWORKFLOW_SECTION> |
|
14 |
<ADMIN_EMAIL /> |
|
15 |
<CONFIGURATION status="EXECUTABLE"> |
|
16 |
<WORKFLOW id="0ef16778-4667-44b9-b453-3386f445beb1_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IndexHDFS" /> |
|
17 |
</CONFIGURATION> |
|
18 |
<SCHEDULING enabled="false"> |
|
19 |
<CRON>29 5 22 ? * *</CRON> |
|
20 |
<MININTERVAL>10080</MININTERVAL> |
|
21 |
</SCHEDULING> |
|
22 |
</BODY> |
|
23 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/2.0.0/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml | ||
---|---|---|
105 | 105 |
<!-- p:targetTable="${hbase.mapred.datatable}" p:hFileOutput="${services.mapreduce.mdrecords.hdsf.hfile.output}" --> |
106 | 106 |
<!-- p:xslt="classpath:/eu/dnetlib/data/transform/dmf_2_hbase.xsl" /> --> |
107 | 107 |
|
108 |
<bean id="wfNodeSetHdfsFile" class="eu.dnetlib.msro.openaireplus.workflows.nodes.index.SetHdfsFileForIndexingJobNode" |
|
109 |
scope="prototype" /> |
|
108 | 110 |
|
109 |
|
|
110 | 111 |
<bean id="wfNodeFindIndex" class="eu.dnetlib.msro.openaireplus.workflows.nodes.index.FindIndexJobNode" |
111 | 112 |
scope="prototype" /> |
112 | 113 |
|
Also available in: Unified diff
added wf to index an hdfs file