Project

General

Profile

1
package eu.dnetlib.data.hadoop.oozie;
2

    
3
import java.io.IOException;
4
import java.util.*;
5

    
6
import com.google.common.collect.Maps;
7
import com.google.common.collect.Sets;
8
import org.apache.commons.io.IOUtils;
9
import org.apache.commons.lang.StringUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.apache.oozie.client.OozieClient;
13
import org.apache.oozie.client.OozieClientException;
14
import org.apache.oozie.client.WorkflowAction;
15
import org.apache.oozie.client.WorkflowJob;
16
import org.apache.oozie.client.WorkflowJob.Status;
17

    
18
import eu.dnetlib.data.hadoop.action.JobCompletion;
19
import eu.dnetlib.data.hadoop.action.JobMonitor;
20
import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
21

    
22
import static java.lang.String.format;
23

    
24
public class OozieJobMonitor extends JobMonitor {
25

    
26
	private static final Log log = LogFactory.getLog(JobMonitor.class); // NOPMD by marko on 11/24/08 5:02 PM
27

    
28
	private final OozieClient oozieClient;
29

    
30
	private final String jobId;
31

    
32
	public static final String ACTION_TYPE_SUBWORKFLOW = "sub-workflow";
33

    
34
	private Set<String> workflowActions = Sets.newHashSet();
35

    
36
	@Deprecated
37
	public OozieJobMonitor(final OozieClient oozieClient, String jobId, final JobCompletion callback) {
38
		super(callback);
39
		this.oozieClient = oozieClient;
40
		this.jobId = jobId;
41
	}
42

    
43
	public OozieJobMonitor(final OozieClient oozieClient, String jobId, final JobCompletion callback, final Set<String> workflowActions) {
44
		super(callback);
45
		this.oozieClient = oozieClient;
46
		this.jobId = jobId;
47
		this.workflowActions = workflowActions;
48
	}
49

    
50
	@Override
51
	public void run() {
52
		try {
53
			log.info("waiting for oozie job completion: " + getHadoopId());
54

    
55
			Status status = Status.PREP;
56
			while (status.equals(Status.PREP) || status.equals(Status.RUNNING)) {
57
				Thread.sleep(monitorSleepTimeSecs * 1000);
58

    
59
				try {
60
					final Status currentStatus = doGetStatus();
61

    
62
					if (!currentStatus.equals(status)) {
63
						status = currentStatus;
64
						lastActivity = new Date();
65
					}
66
				} catch (Throwable e) {
67
					log.warn(format("error polling status for job %s", jobId), e);
68
				}
69
			}
70

    
71
			log.debug(format("job %s finihsed with status: %s", jobId, status));
72
			if (Status.SUCCEEDED.equals(status)) {
73
				// TODO set some content to return to the blackboard msg.
74

    
75
				log.info(format("looking for oozie job(%s) output values: %s", getHadoopId(), workflowActions));
76
				final Properties report = getReport(getOozieClient(), getHadoopId(), workflowActions);
77
				if (report != null) {
78
					final Map<String, String> map = Maps.newHashMap();
79
					report.forEach((k, v) -> map.put(k.toString(), v.toString()));
80
					log.info("found oozie job report, size: " + map.size());
81
					map.entrySet().forEach(e -> log.info(e.getKey() + " - " + e.getValue()));
82

    
83
					getCallback().done(map);
84
				} else {
85
					log.warn("cannot find oozie job report!");
86
					getCallback().done(new HashMap<>());
87
				}
88
            } else {
89
				// TODO retrieve some failure information from the oozie client.
90
				String msg = format("hadoop job: %s failed with status: %s, oozie log:\n %s\n", getHadoopId(), getStatus(), getOozieClient().getJobLog(getHadoopId()));
91
				getCallback().failed(msg, new HadoopServiceException(msg));
92
            }
93
		} catch (Throwable e) {
94
			getCallback().failed(getHadoopId(), e);
95
		}
96
	}
97

    
98
	/**
99
	 * Provides report entries when found for given oozie job identifier. Returns null when report not found.
100
	 */
101
	private static Properties getReport(final OozieClient oozieClient, final String oozieJobId, final Set<String> workflowActions) throws OozieClientException, IOException {
102
		Properties properties = new Properties();
103
		WorkflowJob oozieJob = oozieClient.getJobInfo(oozieJobId);
104
		for (WorkflowAction currentAction : oozieJob.getActions()) {
105
			log.info(String.format("looking for workflow actions to report, current: '%s'", currentAction.getName()));
106
			if (workflowActions.contains(currentAction.getName())) {
107
				log.info(String.format("found workflow action %s", currentAction.getName()));
108
				if (ACTION_TYPE_SUBWORKFLOW.equals(currentAction.getType())) {
109
					log.info(String.format("looking for sub-workflow actions external id: %s", currentAction.getExternalId()));
110
					Properties subworkflowProperties = getReport(oozieClient, currentAction.getExternalId(), workflowActions);
111
					if (subworkflowProperties != null) {
112
						return subworkflowProperties;
113
					}
114
				} else if (StringUtils.isNotBlank(currentAction.getData())) {
115
					properties.load(IOUtils.toInputStream(currentAction.getData()));
116
					log.info(String.format("found workflow action(%s) properties size %s", currentAction.getName(), properties.values().size()));
117
				}
118
			} else {
119
				log.info(String.format("cannot find workflow action(%s) properties", currentAction.getName()));
120
			}
121
		}
122
		log.info(String.format("found workflow (%s) properties size %s", oozieJob.getAppName(), properties.values().size()));
123
		return properties;
124
	}
125

    
126
	@Override
127
	public String getHadoopId() {
128
		return jobId;
129
	}
130

    
131
	public OozieClient getOozieClient() {
132
		return oozieClient;
133
	}
134

    
135
	@Override
136
	public String getStatus() {
137
		try {
138
			return doGetStatus().toString();
139
		} catch (OozieClientException e) {
140
			log.error("error accessing job status", e);
141
			return "UNKNOWN";
142
		}
143
	}
144

    
145
	private Status doGetStatus() throws OozieClientException {
146
		return getOozieClient().getJobInfo(getHadoopId()).getStatus();
147
	}
148

    
149
	@Override
150
	public Date getLastActivity() {
151
		return lastActivity;
152
	}
153

    
154
	@Override
155
	public Date getStartTime() throws HadoopServiceException {
156
		try {
157
			return getOozieClient().getJobInfo(getHadoopId()).getStartTime();
158
		} catch (OozieClientException e) {
159
			throw new HadoopServiceException("unable to read job start time", e);
160
		}
161
	}
162

    
163
	@Override
164
	public String getTrackerUrl() {
165
		return getOozieClient().getOozieUrl();
166
	}
167

    
168
	@Override
169
	public void kill() {
170
		try {
171
			getOozieClient().kill(getHadoopId());
172
		} catch (OozieClientException e) {
173
			log.error("unable to kill job: " + getHadoopId(), e);
174
		}
175
	}
176

    
177
}
(2-2/2)