![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.oozie.service.StatusTransitService Maven / Gradle / Ivy
The newest version!
/**
* Copyright (c) 2010 Yahoo! Inc. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package org.apache.oozie.service;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.oozie.BundleActionBean;
import org.apache.oozie.BundleJobBean;
import org.apache.oozie.CoordinatorActionBean;
import org.apache.oozie.CoordinatorJobBean;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.Job;
import org.apache.oozie.command.CommandException;
import org.apache.oozie.command.bundle.BundleKillXCommand;
import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand;
import org.apache.oozie.executor.jpa.BundleActionsGetByLastModifiedTimeJPAExecutor;
import org.apache.oozie.executor.jpa.BundleActionsGetJPAExecutor;
import org.apache.oozie.executor.jpa.BundleJobGetJPAExecutor;
import org.apache.oozie.executor.jpa.BundleJobUpdateJPAExecutor;
import org.apache.oozie.executor.jpa.BundleJobsGetPendingJPAExecutor;
import org.apache.oozie.executor.jpa.BundleJobsGetRunningJPAExecutor;
import org.apache.oozie.executor.jpa.CoordActionsGetByLastModifiedTimeJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobGetActionsJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobUpdateJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobsGetPendingJPAExecutor;
import org.apache.oozie.executor.jpa.JPAExecutorException;
import org.apache.oozie.service.SchedulerService;
import org.apache.oozie.service.Service;
import org.apache.oozie.service.Services;
import org.apache.oozie.util.DateUtils;
import org.apache.oozie.util.MemoryLocks;
import org.apache.oozie.util.XLog;
/**
* StateTransitService is scheduled to run at the configured interval.
*
* It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0 (job
* done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to
* SUCCEEDED.
*/
public class StatusTransitService implements Service {
public static final String CONF_PREFIX = Service.CONF_PREFIX + "StatusTransitService.";
public static final String CONF_STATUSTRANSIT_INTERVAL = CONF_PREFIX + "statusTransit.interval";
private static int limit = -1;
private static Date lastInstanceStartTime = null;
private final static XLog LOG = XLog.getLog(StatusTransitRunnable.class);
/**
* StateTransitRunnable is the runnable which is scheduled to run at the configured interval.
*
* It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0
* (job done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to
* SUCCEEDED.
*/
static class StatusTransitRunnable implements Runnable {
private JPAService jpaService = null;
private MemoryLocks.LockToken lock;
public StatusTransitRunnable() {
jpaService = Services.get().get(JPAService.class);
if (jpaService == null) {
LOG.error("Missing JPAService");
}
}
public void run() {
try {
Date curDate = new Date(); // records the start time of this service run;
// first check if there is some other instance running;
lock = Services.get().get(MemoryLocksService.class).getWriteLock(StatusTransitService.class.getName(),
lockTimeout);
if (lock == null) {
LOG.info("This StatusTransitService instance"
+ " will not run since there is already an instance running");
}
else {
LOG.info("Acquired lock for [{0}]", StatusTransitService.class.getName());
// running coord jobs transit service
coordTransit();
// running bundle jobs transit service
bundleTransit();
lastInstanceStartTime = curDate;
}
}
catch (Exception ex) {
LOG.warn("Exception happened during bundle job status transition", ex);
}
finally {
// release lock;
if (lock != null) {
lock.release();
LOG.info("Released lock for [{0}]", StatusTransitService.class.getName());
}
}
}
/**
* Aggregate bundle actions' status to bundle jobs
*
* @throws JPAExecutorException thrown if failed in db updates or retrievals
* @throws CommandException thrown if failed to run commands
*/
private void bundleTransit() throws JPAExecutorException, CommandException {
List pendingJobCheckList = null;
List runningJobCheckList = null;
List> bundleLists = new ArrayList>();
if (lastInstanceStartTime == null) {
LOG.info("Running bundle status service first instance");
// this is the first instance, we need to check for all pending jobs;
pendingJobCheckList = jpaService.execute(new BundleJobsGetPendingJPAExecutor(limit));
runningJobCheckList = jpaService.execute(new BundleJobsGetRunningJPAExecutor(limit));
bundleLists.add(pendingJobCheckList);
bundleLists.add(runningJobCheckList);
}
else {
LOG.info("Running bundle status service from last instance time = "
+ DateUtils.convertDateToString(lastInstanceStartTime));
// this is not the first instance, we should only check jobs that have actions been
// updated >= start time of last service run;
List actionList = jpaService
.execute(new BundleActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime));
Set bundleIds = new HashSet();
for (BundleActionBean action : actionList) {
bundleIds.add(action.getBundleId());
}
pendingJobCheckList = new ArrayList();
for (String bundleId : bundleIds.toArray(new String[bundleIds.size()])) {
BundleJobBean bundle = jpaService.execute(new BundleJobGetJPAExecutor(bundleId));
// Running bundle job might have pending false
if (bundle.isPending() || bundle.getStatus().equals(Job.Status.RUNNING)) {
pendingJobCheckList.add(bundle);
}
}
runningJobCheckList = pendingJobCheckList;
bundleLists.add(pendingJobCheckList);
}
updateBundleJobStatus(bundleLists);
}
private void updateBundleJobStatus(List> bundleLists) throws JPAExecutorException,
CommandException {
if (bundleLists != null) {
for (List listBundleBean : bundleLists) {
for (BundleJobBean bundleJob : listBundleBean) {
String jobId = bundleJob.getId();
Job.Status[] bundleStatus = new Job.Status[1];
bundleStatus[0] = bundleJob.getStatus();
List bundleActions = jpaService
.execute(new BundleActionsGetJPAExecutor(jobId));
HashMap bundleActionStatus = new HashMap();
boolean foundPending = false;
for (BundleActionBean bAction : bundleActions) {
if (!bAction.isPending()) {
int counter = 0;
if (bundleActionStatus.containsKey(bAction.getStatus())) {
counter = bundleActionStatus.get(bAction.getStatus()) + 1;
}
else {
++counter;
}
bundleActionStatus.put(bAction.getStatus(), counter);
if (bAction.getCoordId() == null
&& (bAction.getStatus() == Job.Status.FAILED || bAction.getStatus() == Job.Status.KILLED)) {
(new BundleKillXCommand(jobId)).call();
LOG.info("Bundle job [" + jobId
+ "] has been killed since one of its coordinator job failed submission.");
}
}
else {
foundPending = true;
break;
}
}
if(foundPending){
continue;
}
if (checkTerminalStatus(bundleActionStatus, bundleActions, bundleStatus)) {
LOG.info("Bundle job [" + jobId + "] Status set to " + bundleStatus[0].toString());
updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
continue;
}
else if (checkPrepStatus(bundleActionStatus, bundleActions, bundleStatus)) {
LOG.info("Bundle job [" + jobId + "] Status set to " + bundleStatus[0].toString());
updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
continue;
}
else if (checkPausedStatus(bundleActionStatus, bundleActions, bundleStatus)) {
LOG.info("Bundle job [" + jobId + "] Status set to " + bundleStatus[0].toString());
updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
continue;
}
else if (checkSuspendStatus(bundleActionStatus, bundleActions, bundleStatus)) {
LOG.info("Bundle job [" + jobId + "] Status set to " + bundleStatus[0].toString());
updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
continue;
}
else if (checkRunningStatus(bundleActionStatus, bundleActions, bundleStatus)) {
LOG.info("Bundle job [" + jobId + "] Status set to " + bundleStatus[0].toString());
updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
continue;
}
}
}
}
}
private void updateCoordJobStatus(List CoordList) throws JPAExecutorException,
CommandException {
if (CoordList != null) {
for (CoordinatorJobBean coordJob : CoordList) {
String jobId = coordJob.getId();
Job.Status[] coordStatus = new Job.Status[1];
coordStatus[0] = coordJob.getStatus();
List coordActions = jpaService.execute(new CoordJobGetActionsJPAExecutor(
jobId));
HashMap coordActionStatus = new HashMap();
boolean foundPending = false;
for (CoordinatorActionBean cAction : coordActions) {
if (!cAction.isPending()) {
int counter = 0;
if (coordActionStatus.containsKey(cAction.getStatus())) {
counter = coordActionStatus.get(cAction.getStatus()) + 1;
}
else {
++counter;
}
coordActionStatus.put(cAction.getStatus(), counter);
}
else {
foundPending = true;
break;
}
}
if (foundPending) {
continue;
}
if (checkCoordTerminalStatus(coordActionStatus, coordActions, coordStatus)) {
LOG.info("Coord job [" + jobId + "] Status set to " + coordStatus[0].toString());
updateCoordJob(coordActionStatus,coordActions,coordJob, coordStatus[0]);
continue;
}
else if (checkCoordSuspendStatus(coordActionStatus, coordActions, coordStatus)) {
LOG.info("Coord job [" + jobId + "] Status set to " + coordStatus[0].toString());
updateCoordJob(coordActionStatus,coordActions,coordJob, coordStatus[0]);
continue;
}
}
}
}
private boolean checkTerminalStatus(HashMap bundleActionStatus,
List bundleActions, Job.Status[] bundleStatus) {
boolean ret = false;
int totalValuesSucceed = 0;
if (bundleActionStatus.containsKey(Job.Status.SUCCEEDED)) {
totalValuesSucceed = bundleActionStatus.get(Job.Status.SUCCEEDED);
}
int totalValuesFailed = 0;
if (bundleActionStatus.containsKey(Job.Status.FAILED)) {
totalValuesFailed = bundleActionStatus.get(Job.Status.FAILED);
}
int totalValuesKilled = 0;
if (bundleActionStatus.containsKey(Job.Status.KILLED)) {
totalValuesKilled = bundleActionStatus.get(Job.Status.KILLED);
}
int totalValuesDoneWithError = 0;
if (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR)) {
totalValuesDoneWithError = bundleActionStatus.get(Job.Status.DONEWITHERROR);
}
if (bundleActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesDoneWithError)) {
// If all the bundle actions are succeeded then bundle job should be succeeded.
if (bundleActions.size() == totalValuesSucceed) {
bundleStatus[0] = Job.Status.SUCCEEDED;
ret = true;
}
else if (bundleActions.size() == totalValuesKilled) {
// If all the bundle actions are KILLED then bundle job should be KILLED.
bundleStatus[0] = Job.Status.KILLED;
ret = true;
}
else if (bundleActions.size() == totalValuesFailed) {
// If all the bundle actions are FAILED then bundle job should be FAILED.
bundleStatus[0] = Job.Status.FAILED;
ret = true;
}
else {
bundleStatus[0] = Job.Status.DONEWITHERROR;
ret = true;
}
}
return ret;
}
private boolean checkCoordTerminalStatus(HashMap coordActionStatus,
List coordActions, Job.Status[] coordStatus) {
boolean ret = false;
int totalValuesSucceed = 0;
if (coordActionStatus.containsKey(CoordinatorAction.Status.SUCCEEDED)) {
totalValuesSucceed = coordActionStatus.get(CoordinatorAction.Status.SUCCEEDED);
}
int totalValuesFailed = 0;
if (coordActionStatus.containsKey(CoordinatorAction.Status.FAILED)) {
totalValuesFailed = coordActionStatus.get(CoordinatorAction.Status.FAILED);
}
int totalValuesKilled = 0;
if (coordActionStatus.containsKey(CoordinatorAction.Status.KILLED)) {
totalValuesKilled = coordActionStatus.get(CoordinatorAction.Status.KILLED);
}
int totalValuesTimeOut = 0;
if (coordActionStatus.containsKey(CoordinatorAction.Status.TIMEDOUT)) {
totalValuesTimeOut = coordActionStatus.get(CoordinatorAction.Status.TIMEDOUT);
}
if (coordActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesTimeOut)) {
// If all the bundle actions are succeeded then bundle job should be succeeded.
if (coordActions.size() == totalValuesSucceed) {
coordStatus[0] = Job.Status.SUCCEEDED;
ret = true;
}
else if (coordActions.size() == totalValuesKilled) {
// If all the bundle actions are KILLED then bundle job should be KILLED.
coordStatus[0] = Job.Status.KILLED;
ret = true;
}
else if (coordActions.size() == totalValuesFailed) {
// If all the bundle actions are FAILED then bundle job should be FAILED.
coordStatus[0] = Job.Status.FAILED;
ret = true;
}
else if (coordActions.size() == totalValuesTimeOut) {
// If all the bundle actions are TIMEOUT then bundle job should be FAILED.
coordStatus[0] = Job.Status.FAILED;
ret = true;
}
else {
coordStatus[0] = Job.Status.DONEWITHERROR;
ret = true;
}
}
return ret;
}
private boolean checkPrepStatus(HashMap bundleActionStatus,
List bundleActions, Job.Status[] bundleStatus) {
boolean ret = false;
if (bundleActionStatus.containsKey(Job.Status.PREP)) {
// If all the bundle actions are PREP then bundle job should be RUNNING.
if (bundleActions.size() > bundleActionStatus.get(Job.Status.PREP)) {
bundleStatus[0] = Job.Status.RUNNING;
ret = true;
}
}
return ret;
}
private boolean checkPausedStatus(HashMap bundleActionStatus,
List bundleActions, Job.Status[] bundleStatus) {
boolean ret = false;
if (bundleActionStatus.containsKey(Job.Status.PAUSED)) {
if (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED)) {
bundleStatus[0] = Job.Status.PAUSED;
ret = true;
}
else if (bundleActionStatus.containsKey(Job.Status.PAUSEDWITHERROR)
&& (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED)
+ bundleActionStatus.get(Job.Status.PAUSEDWITHERROR))) {
// bundleStatus = Job.Status.PAUSEDWITHERROR;
// We need to change this to PAUSEDWITHERROR in future when we add this to coordinator
bundleStatus[0] = Job.Status.PAUSED;
ret = true;
}
}
return ret;
}
private boolean checkSuspendStatus(HashMap bundleActionStatus,
List bundleActions, Job.Status[] bundleStatus) {
boolean ret = false;
if (bundleActionStatus.containsKey(Job.Status.SUSPENDED)) {
if (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED)) {
bundleStatus[0] = Job.Status.SUSPENDED;
ret = true;
}
else if (bundleActionStatus.containsKey(Job.Status.SUSPENDEDWITHERROR)
&& (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED)
+ bundleActionStatus.get(Job.Status.SUSPENDEDWITHERROR))) {
// bundleStatus = Job.Status.SUSPENDEDWITHERROR;
// We need to change this to SUSPENDEDWITHERROR in future when we add this to coordinator
bundleStatus[0] = Job.Status.SUSPENDED;
ret = true;
}
}
return ret;
}
private boolean checkCoordSuspendStatus(HashMap coordActionStatus,
List coordActions, Job.Status[] coordStatus) {
boolean ret = false;
if (coordActionStatus.containsKey(CoordinatorAction.Status.SUSPENDED)) {
if (coordActions.size() == coordActionStatus.get(CoordinatorAction.Status.SUSPENDED)) {
coordStatus[0] = Job.Status.SUSPENDED;
ret = true;
}
}
return ret;
}
private boolean checkRunningStatus(HashMap bundleActionStatus,
List bundleActions, Job.Status[] bundleStatus) {
boolean ret = false;
if (bundleActionStatus.containsKey(Job.Status.RUNNING)) {
// If all the bundle actions are succeeded then bundle job should be succeeded.
if (bundleActions.size() == bundleActionStatus.get(Job.Status.RUNNING)) {
bundleStatus[0] = Job.Status.RUNNING;
ret = true;
}
else if (bundleActionStatus.get(Job.Status.RUNNING) > 0) {
if ((bundleActionStatus.containsKey(Job.Status.FAILED) && bundleActionStatus.get(Job.Status.FAILED) > 0)
|| (bundleActionStatus.containsKey(Job.Status.KILLED) && bundleActionStatus
.get(Job.Status.KILLED) > 0)
|| (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR) && bundleActionStatus
.get(Job.Status.DONEWITHERROR) > 0)
|| (bundleActionStatus.containsKey(Job.Status.RUNNINGWITHERROR) && bundleActionStatus
.get(Job.Status.RUNNINGWITHERROR) > 0)) {
// bundleStatus = Job.Status.RUNNINGWITHERROR;
// We need to change this to RUNNINGWIHERROR in future when we add this to coordinator
bundleStatus[0] = Job.Status.RUNNING;
ret = true;
}
}
}
return ret;
}
private void updateBundleJob(HashMap bundleActionStatus,
List bundleActions, BundleJobBean bundleJob, Job.Status bundleStatus)
throws JPAExecutorException {
String jobId = bundleJob.getId();
boolean pendingBundleJob = bundleJob.isPending();
// Checking the bundle pending should be updated or not
int totalNonPendingActions = 0;
for (Job.Status js : bundleActionStatus.keySet()) {
totalNonPendingActions += bundleActionStatus.get(js);
}
if (totalNonPendingActions == bundleActions.size()) {
pendingBundleJob = false;
}
// Update the Bundle Job
bundleJob.setStatus(bundleStatus);
if (pendingBundleJob) {
bundleJob.setPending();
LOG.info("Bundle job [" + jobId + "] Pending set to TRUE");
}
else {
bundleJob.resetPending();
LOG.info("Bundle job [" + jobId + "] Pending set to FALSE");
}
jpaService.execute(new BundleJobUpdateJPAExecutor(bundleJob));
}
private void updateCoordJob(HashMap coordActionStatus,
List coordActions, CoordinatorJobBean coordJob, Job.Status coordStatus)
throws JPAExecutorException, CommandException {
Job.Status prevStatus = coordJob.getStatus();
// Update the Coord Job
if (coordJob.getStatus() == Job.Status.SUCCEEDED || coordJob.getStatus() == Job.Status.FAILED
|| coordJob.getStatus() == Job.Status.KILLED || coordJob.getStatus() == Job.Status.DONEWITHERROR) {
if (coordStatus == Job.Status.SUSPENDED) {
LOG.info("Coord Job [" + coordJob.getId()
+ "] status can not be updated as its already in Terminal state");
return;
}
}
String jobId = coordJob.getId();
boolean pendingCoordJob = coordJob.isPending();
// Checking the bundle pending should be updated or not
int totalNonPendingActions = 0;
for (CoordinatorAction.Status js : coordActionStatus.keySet()) {
totalNonPendingActions += coordActionStatus.get(js);
}
if (totalNonPendingActions == coordActions.size()) {
pendingCoordJob = false;
}
if (pendingCoordJob) {
coordJob.setPending();
LOG.info("Coord job [" + jobId + "] Pending set to TRUE");
}
else {
coordJob.resetPending();
LOG.info("Coord job [" + jobId + "] Pending set to FALSE");
}
coordJob.setStatus(coordStatus);
jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob));
// update bundle action only when status changes in coord job
if (coordJob.getBundleId() != null) {
if (!prevStatus.equals(coordJob.getStatus())) {
BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, prevStatus);
bundleStatusUpdate.call();
}
}
}
/**
* Aggregate coordinator actions' status to coordinator jobs
*
* @throws JPAExecutorException thrown if failed in db updates or retrievals
* @throws CommandException thrown if failed to run commands
*/
private void coordTransit() throws JPAExecutorException, CommandException {
List pendingJobCheckList = null;
if (lastInstanceStartTime == null) {
LOG.info("Running coordinator status service first instance");
// this is the first instance, we need to check for all pending jobs;
pendingJobCheckList = jpaService.execute(new CoordJobsGetPendingJPAExecutor(limit));
}
else {
LOG.info("Running coordinator status service from last instance time = "
+ DateUtils.convertDateToString(lastInstanceStartTime));
// this is not the first instance, we should only check jobs that have actions been
// updated >= start time of last service run;
List actionList = jpaService
.execute(new CoordActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime));
Set coordIds = new HashSet();
for (CoordinatorActionBean action : actionList) {
coordIds.add(action.getJobId());
}
pendingJobCheckList = new ArrayList();
for (String coordId : coordIds.toArray(new String[coordIds.size()])) {
CoordinatorJobBean coordJob = jpaService.execute(new CoordJobGetJPAExecutor(coordId));
if (coordJob.isPending()) {
pendingJobCheckList.add(coordJob);
}
}
}
updateCoordJobStatus(pendingJobCheckList);
}
}
/**
* Initializes the {@link StatusTransitService}.
*
* @param services services instance.
*/
@Override
public void init(Services services) {
Configuration conf = services.getConf();
Runnable stateTransitRunnable = new StatusTransitRunnable();
services.get(SchedulerService.class).schedule(stateTransitRunnable, 10,
conf.getInt(CONF_STATUSTRANSIT_INTERVAL, 60), SchedulerService.Unit.SEC);
}
/**
* Destroy the StateTransit Jobs Service.
*/
@Override
public void destroy() {
}
/**
* Return the public interface for the purge jobs service.
*
* @return {@link StatusTransitService}.
*/
@Override
public Class extends Service> getInterface() {
return StatusTransitService.class;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy