1 |
36274
|
alessia.ba
|
package eu.dnetlib.msro.openaireplus.workflows.nodes;
|
2 |
|
|
|
3 |
|
|
import com.googlecode.sarasvati.Arc;
|
4 |
|
|
import com.googlecode.sarasvati.NodeToken;
|
5 |
|
|
import eu.dnetlib.msro.rmi.MSROException;
|
6 |
|
|
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
|
7 |
|
|
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
|
8 |
48139
|
alessia.ba
|
import org.apache.commons.logging.Log;
|
9 |
|
|
import org.apache.commons.logging.LogFactory;
|
10 |
36274
|
alessia.ba
|
|
11 |
|
|
public class CheckHDFSCountJobNode extends SimpleJobNode {
|
12 |
|
|
|
13 |
|
|
private static final Log log = LogFactory.getLog(CheckHDFSCountJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
|
14 |
|
|
|
15 |
48139
|
alessia.ba
|
/**
|
16 |
|
|
* Name of the env param where to find the number to check.
|
17 |
|
|
**/
|
18 |
36274
|
alessia.ba
|
private String numberToVerifyParamName;
|
19 |
48139
|
alessia.ba
|
/**
|
20 |
|
|
* Name of the enc param where to find the number of records stored to hdfs.
|
21 |
|
|
**/
|
22 |
36274
|
alessia.ba
|
private String hdfsCounterParamName = "mainlog:storeHdfsRecords:count";
|
23 |
|
|
|
24 |
|
|
/**
|
25 |
|
|
* {@inheritDoc}
|
26 |
48139
|
alessia.ba
|
*
|
27 |
36274
|
alessia.ba
|
* @throws MSROException
|
28 |
|
|
* @see com.googlecode.sarasvati.mem.MemNode#execute(com.googlecode.sarasvati.Engine, com.googlecode.sarasvati.NodeToken)
|
29 |
|
|
*/
|
30 |
|
|
@Override
|
31 |
|
|
public String execute(final NodeToken token) throws MSROException {
|
32 |
|
|
int hdfsCount = 0;
|
33 |
|
|
int hbaseCount = 0;
|
34 |
|
|
String numberToVerifyInEnvParam = WorkflowsConstants.BLACKBOARD_PARAM_PREFIX + numberToVerifyParamName;
|
35 |
|
|
hbaseCount = getValueFromEnv(token, numberToVerifyInEnvParam);
|
36 |
|
|
hdfsCount = getValueFromEnv(token, hdfsCounterParamName);
|
37 |
|
|
|
38 |
|
|
if (hdfsCount != hbaseCount) {
|
39 |
|
|
log.warn("Numbers are not the same. Number to verify (hbase count): " + hbaseCount + ". From hdfs: " + hdfsCount);
|
40 |
|
|
log.warn((hdfsCount - hbaseCount) + " records come without a 'metadata' field");
|
41 |
|
|
token.getEnv().setAttribute("noMetadataRecords", hdfsCount - hbaseCount);
|
42 |
|
|
}
|
43 |
|
|
return Arc.DEFAULT_ARC;
|
44 |
|
|
|
45 |
|
|
}
|
46 |
|
|
|
47 |
|
|
private int getValueFromEnv(final NodeToken token, final String paramName) throws MSROException {
|
48 |
|
|
String valueString = token.getEnv().getAttribute(paramName);
|
49 |
|
|
int number = 0;
|
50 |
|
|
try {
|
51 |
|
|
number = Integer.parseInt(valueString);
|
52 |
|
|
} catch (NumberFormatException e) {
|
53 |
|
|
log.error("Env param name: " + paramName + " is " + valueString + " and cannot be parsed as integer");
|
54 |
|
|
throw new MSROException(e);
|
55 |
|
|
}
|
56 |
|
|
return number;
|
57 |
|
|
}
|
58 |
|
|
|
59 |
|
|
public String getNumberToVerifyParamName() {
|
60 |
|
|
return numberToVerifyParamName;
|
61 |
|
|
}
|
62 |
|
|
|
63 |
|
|
public void setNumberToVerifyParamName(final String numberToVerifyParamName) {
|
64 |
|
|
this.numberToVerifyParamName = numberToVerifyParamName;
|
65 |
|
|
}
|
66 |
|
|
|
67 |
|
|
public String getHdfsCounterParamName() {
|
68 |
|
|
return hdfsCounterParamName;
|
69 |
|
|
}
|
70 |
|
|
|
71 |
|
|
public void setHdfsCounterParamName(final String hdfsCounterParamName) {
|
72 |
|
|
this.hdfsCounterParamName = hdfsCounterParamName;
|
73 |
|
|
}
|
74 |
|
|
|
75 |
|
|
}
|