1
|
package eu.dnetlib.data.hadoop.oozie;
|
2
|
|
3
|
import java.io.IOException;
|
4
|
import java.util.*;
|
5
|
|
6
|
import com.google.common.collect.Maps;
|
7
|
import com.google.common.collect.Sets;
|
8
|
import org.apache.commons.io.IOUtils;
|
9
|
import org.apache.commons.lang.StringUtils;
|
10
|
import org.apache.commons.logging.Log;
|
11
|
import org.apache.commons.logging.LogFactory;
|
12
|
import org.apache.oozie.client.OozieClient;
|
13
|
import org.apache.oozie.client.OozieClientException;
|
14
|
import org.apache.oozie.client.WorkflowAction;
|
15
|
import org.apache.oozie.client.WorkflowJob;
|
16
|
import org.apache.oozie.client.WorkflowJob.Status;
|
17
|
|
18
|
import eu.dnetlib.data.hadoop.action.JobCompletion;
|
19
|
import eu.dnetlib.data.hadoop.action.JobMonitor;
|
20
|
import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
|
21
|
|
22
|
import static java.lang.String.format;
|
23
|
|
24
|
public class OozieJobMonitor extends JobMonitor {
|
25
|
|
26
|
private static final Log log = LogFactory.getLog(JobMonitor.class); // NOPMD by marko on 11/24/08 5:02 PM
|
27
|
|
28
|
private final OozieClient oozieClient;
|
29
|
|
30
|
private final String jobId;
|
31
|
|
32
|
public static final String ACTION_TYPE_SUBWORKFLOW = "sub-workflow";
|
33
|
|
34
|
private Set<String> workflowActions = Sets.newHashSet();
|
35
|
|
36
|
@Deprecated
|
37
|
public OozieJobMonitor(final OozieClient oozieClient, String jobId, final JobCompletion callback) {
|
38
|
super(callback);
|
39
|
this.oozieClient = oozieClient;
|
40
|
this.jobId = jobId;
|
41
|
}
|
42
|
|
43
|
public OozieJobMonitor(final OozieClient oozieClient, String jobId, final JobCompletion callback, final Set<String> workflowActions) {
|
44
|
super(callback);
|
45
|
this.oozieClient = oozieClient;
|
46
|
this.jobId = jobId;
|
47
|
this.workflowActions = workflowActions;
|
48
|
}
|
49
|
|
50
|
@Override
|
51
|
public void run() {
|
52
|
try {
|
53
|
log.info("waiting for oozie job completion: " + getHadoopId());
|
54
|
|
55
|
Status status = Status.PREP;
|
56
|
while (status.equals(Status.PREP) || status.equals(Status.RUNNING) || status.equals(Status.SUSPENDED)) {
|
57
|
Thread.sleep(monitorSleepTimeSecs * 1000);
|
58
|
|
59
|
try {
|
60
|
final Status currentStatus = doGetStatus();
|
61
|
|
62
|
if (!currentStatus.equals(status)) {
|
63
|
log.debug(format("jobId %s status changed from %s to %s", jobId, status.toString(), currentStatus.toString()));
|
64
|
status = currentStatus;
|
65
|
lastActivity = new Date();
|
66
|
}
|
67
|
} catch (Throwable e) {
|
68
|
log.warn(format("error polling status for job %s", jobId), e);
|
69
|
}
|
70
|
}
|
71
|
|
72
|
log.debug(format("looking for oozie job(%s) output values: %s", getHadoopId(), workflowActions));
|
73
|
Map<String, String> report = getReport(getOozieClient(), getHadoopId(), workflowActions);
|
74
|
|
75
|
log.info(format("job %s finished with status: %s", jobId, status));
|
76
|
if (Status.SUCCEEDED.equals(status)) {
|
77
|
getCallback().done(report);
|
78
|
|
79
|
} else {
|
80
|
// TODO retrieve some failure information from the oozie client.
|
81
|
String msg = format("hadoop job: %s failed with status: %s, oozie log:\n %s\n", getHadoopId(), getStatus(), getOozieClient().getJobLog(getHadoopId()));
|
82
|
getCallback().failed(report, msg, new HadoopServiceException(msg));
|
83
|
}
|
84
|
} catch (Throwable e) {
|
85
|
getCallback().failed(Maps.newHashMap(), getHadoopId(), e);
|
86
|
}
|
87
|
}
|
88
|
|
89
|
/**
|
90
|
* Provides report entries when found for given oozie job identifier. Returns null when report not found.
|
91
|
*/
|
92
|
private static Map<String, String> getReport(final OozieClient oozieClient, final String oozieJobId, final Set<String> workflowActions) throws OozieClientException, IOException {
|
93
|
Map<String, String> map = Maps.newHashMap();
|
94
|
WorkflowJob oozieJob = oozieClient.getJobInfo(oozieJobId);
|
95
|
for (WorkflowAction currentAction : oozieJob.getActions()) {
|
96
|
log.info(String.format("looking for workflow actions to report, current: '%s'", currentAction.getName()));
|
97
|
if (workflowActions.contains(currentAction.getName())) {
|
98
|
log.info(String.format("found workflow action %s", currentAction.getName()));
|
99
|
if (ACTION_TYPE_SUBWORKFLOW.equals(currentAction.getType())) {
|
100
|
log.info(String.format("looking for sub-workflow actions external id: %s", currentAction.getExternalId()));
|
101
|
Map<String, String> subworkflowProperties = getReport(oozieClient, currentAction.getExternalId(), workflowActions);
|
102
|
if (subworkflowProperties != null) {
|
103
|
return subworkflowProperties;
|
104
|
}
|
105
|
} else if (StringUtils.isNotBlank(currentAction.getData())) {
|
106
|
Properties properties = new Properties();
|
107
|
properties.load(IOUtils.toInputStream(currentAction.getData()));
|
108
|
properties.forEach((k, v) -> map.put(currentAction.getName() + ":" + k.toString(), v.toString()));
|
109
|
log.info(String.format("found workflow action(%s) properties size %s", currentAction.getName(), properties.values().size()));
|
110
|
}
|
111
|
} else {
|
112
|
log.info(String.format("cannot find workflow action(%s) properties", currentAction.getName()));
|
113
|
}
|
114
|
}
|
115
|
log.info(String.format("found workflow (%s) properties size %s", oozieJob.getAppName(), map.values().size()));
|
116
|
map.entrySet().forEach(e -> log.info(e.getKey() + " - " + e.getValue()));
|
117
|
return map;
|
118
|
}
|
119
|
|
120
|
@Override
|
121
|
public String getHadoopId() {
|
122
|
return jobId;
|
123
|
}
|
124
|
|
125
|
public OozieClient getOozieClient() {
|
126
|
return oozieClient;
|
127
|
}
|
128
|
|
129
|
@Override
|
130
|
public String getStatus() {
|
131
|
try {
|
132
|
return doGetStatus().toString();
|
133
|
} catch (OozieClientException e) {
|
134
|
log.error("error accessing job status", e);
|
135
|
return "UNKNOWN";
|
136
|
}
|
137
|
}
|
138
|
|
139
|
private Status doGetStatus() throws OozieClientException {
|
140
|
return getOozieClient().getJobInfo(getHadoopId()).getStatus();
|
141
|
}
|
142
|
|
143
|
@Override
|
144
|
public Date getLastActivity() {
|
145
|
return lastActivity;
|
146
|
}
|
147
|
|
148
|
@Override
|
149
|
public Date getStartTime() throws HadoopServiceException {
|
150
|
try {
|
151
|
return getOozieClient().getJobInfo(getHadoopId()).getStartTime();
|
152
|
} catch (OozieClientException e) {
|
153
|
throw new HadoopServiceException("unable to read job start time", e);
|
154
|
}
|
155
|
}
|
156
|
|
157
|
@Override
|
158
|
public String getTrackerUrl() {
|
159
|
return getOozieClient().getOozieUrl();
|
160
|
}
|
161
|
|
162
|
@Override
|
163
|
public void kill() {
|
164
|
try {
|
165
|
getOozieClient().kill(getHadoopId());
|
166
|
} catch (OozieClientException e) {
|
167
|
log.error("unable to kill job: " + getHadoopId(), e);
|
168
|
}
|
169
|
}
|
170
|
|
171
|
}
|