All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hyracks.control.cc.scheduler.JobScheduler Maven / Gradle / Ivy

There is a newer version: 0.3.9
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hyracks.control.cc.scheduler;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Random;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.json.JSONException;
import org.json.JSONObject;

import org.apache.hyracks.api.comm.NetworkAddress;
import org.apache.hyracks.api.constraints.Constraint;
import org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression;
import org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression;
import org.apache.hyracks.api.dataflow.ActivityId;
import org.apache.hyracks.api.dataflow.ConnectorDescriptorId;
import org.apache.hyracks.api.dataflow.IConnectorDescriptor;
import org.apache.hyracks.api.dataflow.OperatorDescriptorId;
import org.apache.hyracks.api.dataflow.TaskAttemptId;
import org.apache.hyracks.api.dataflow.TaskId;
import org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy;
import org.apache.hyracks.api.deployment.DeploymentId;
import org.apache.hyracks.api.exceptions.HyracksException;
import org.apache.hyracks.api.job.ActivityCluster;
import org.apache.hyracks.api.job.ActivityClusterGraph;
import org.apache.hyracks.api.job.JobId;
import org.apache.hyracks.api.job.JobStatus;
import org.apache.hyracks.api.partitions.PartitionId;
import org.apache.hyracks.api.util.JavaSerializationUtils;
import org.apache.hyracks.control.cc.ClusterControllerService;
import org.apache.hyracks.control.cc.NodeControllerState;
import org.apache.hyracks.control.cc.application.CCApplicationContext;
import org.apache.hyracks.control.cc.job.ActivityClusterPlan;
import org.apache.hyracks.control.cc.job.JobRun;
import org.apache.hyracks.control.cc.job.Task;
import org.apache.hyracks.control.cc.job.TaskAttempt;
import org.apache.hyracks.control.cc.job.TaskCluster;
import org.apache.hyracks.control.cc.job.TaskClusterAttempt;
import org.apache.hyracks.control.cc.partitions.PartitionMatchMaker;
import org.apache.hyracks.control.cc.work.JobCleanupWork;
import org.apache.hyracks.control.common.job.PartitionState;
import org.apache.hyracks.control.common.job.TaskAttemptDescriptor;

public class JobScheduler {
    private static final Logger LOGGER = Logger.getLogger(JobScheduler.class.getName());

    private final ClusterControllerService ccs;

    private final JobRun jobRun;

    private final PartitionConstraintSolver solver;

    private final Map partitionProducingTaskClusterMap;

    private final Set inProgressTaskClusters;

    public JobScheduler(ClusterControllerService ccs, JobRun jobRun, Collection constraints) {
        this.ccs = ccs;
        this.jobRun = jobRun;
        solver = new PartitionConstraintSolver();
        partitionProducingTaskClusterMap = new HashMap();
        inProgressTaskClusters = new HashSet();
        solver.addConstraints(constraints);
    }

    public JobRun getJobRun() {
        return jobRun;
    }

    public PartitionConstraintSolver getSolver() {
        return solver;
    }

    public void startJob() throws HyracksException {
        startRunnableActivityClusters();
        ccs.getApplicationContext().notifyJobStart(jobRun.getJobId());
    }

    private void findRunnableTaskClusterRoots(Set frontier, Collection roots)
            throws HyracksException {
        for (ActivityCluster root : roots) {
            findRunnableTaskClusterRoots(frontier, root);
        }
    }

    private void findRunnableTaskClusterRoots(Set frontier, ActivityCluster candidate)
            throws HyracksException {
        boolean depsComplete = true;
        for (ActivityCluster depAC : candidate.getDependencies()) {
            if (!isPlanned(depAC)) {
                depsComplete = false;
                findRunnableTaskClusterRoots(frontier, depAC);
            } else {
                boolean tcRootsComplete = true;
                for (TaskCluster tc : getActivityClusterPlan(depAC).getTaskClusters()) {
                    if (tc.getProducedPartitions().isEmpty()) {
                        TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
                        if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
                            tcRootsComplete = false;
                            break;
                        }
                    }
                }
                if (!tcRootsComplete) {
                    depsComplete = false;
                    findRunnableTaskClusterRoots(frontier, depAC);
                }
            }
        }
        if (depsComplete) {
            if (!isPlanned(candidate)) {
                ActivityClusterPlanner acp = new ActivityClusterPlanner(this);
                ActivityClusterPlan acPlan = acp.planActivityCluster(candidate);
                jobRun.getActivityClusterPlanMap().put(candidate.getId(), acPlan);
                partitionProducingTaskClusterMap.putAll(acp.getPartitionProducingTaskClusterMap());
            }
            for (TaskCluster tc : getActivityClusterPlan(candidate).getTaskClusters()) {
                if (tc.getProducedPartitions().isEmpty()) {
                    TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
                    if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
                        frontier.add(tc);
                    }
                }
            }
        }
    }

    private ActivityClusterPlan getActivityClusterPlan(ActivityCluster ac) {
        return jobRun.getActivityClusterPlanMap().get(ac.getId());
    }

    private boolean isPlanned(ActivityCluster ac) {
        return jobRun.getActivityClusterPlanMap().get(ac.getId()) != null;
    }

    private void startRunnableActivityClusters() throws HyracksException {
        Set taskClusterRoots = new HashSet();
        findRunnableTaskClusterRoots(taskClusterRoots, jobRun.getActivityClusterGraph().getActivityClusterMap()
                .values());
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("Runnable TC roots: " + taskClusterRoots + ", inProgressTaskClusters: "
                    + inProgressTaskClusters);
        }
        if (taskClusterRoots.isEmpty() && inProgressTaskClusters.isEmpty()) {
            ccs.getWorkQueue().schedule(new JobCleanupWork(ccs, jobRun.getJobId(), JobStatus.TERMINATED, null));
            return;
        }
        startRunnableTaskClusters(taskClusterRoots);
    }

    private void startRunnableTaskClusters(Set tcRoots) throws HyracksException {
        Map runnabilityMap = new HashMap();
        for (TaskCluster tc : tcRoots) {
            assignRunnabilityRank(tc, runnabilityMap);
        }

        PriorityQueue queue = new PriorityQueue();
        for (Map.Entry e : runnabilityMap.entrySet()) {
            TaskCluster tc = e.getKey();
            Runnability runnability = e.getValue();
            if (runnability.getTag() != Runnability.Tag.RUNNABLE) {
                continue;
            }
            int priority = runnability.getPriority();
            if (priority >= 0 && priority < Integer.MAX_VALUE) {
                queue.add(new RankedRunnableTaskCluster(priority, tc));
            }
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("Ranked TCs: " + queue);
        }

        Map> taskAttemptMap = new HashMap>();
        for (RankedRunnableTaskCluster rrtc : queue) {
            TaskCluster tc = rrtc.getTaskCluster();
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Found runnable TC: " + tc);
                List attempts = tc.getAttempts();
                LOGGER.fine("Attempts so far:" + attempts.size());
                for (TaskClusterAttempt tcAttempt : attempts) {
                    LOGGER.fine("Status: " + tcAttempt.getStatus());
                }
            }
            assignTaskLocations(tc, taskAttemptMap);
        }

        if (taskAttemptMap.isEmpty()) {
            return;
        }

        startTasks(taskAttemptMap);
    }

    /*
     * Runnability rank has the following semantics
     * Runnability(Runnable TaskCluster depending on completed TaskClusters) = {RUNNABLE, 0}
     * Runnability(Runnable TaskCluster) = max(Rank(Dependent TaskClusters)) + 1
     * Runnability(Non-schedulable TaskCluster) = {NOT_RUNNABLE, _} 
     */
    private Runnability assignRunnabilityRank(TaskCluster goal, Map runnabilityMap) {
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("Computing runnability: " + goal);
        }
        if (runnabilityMap.containsKey(goal)) {
            return runnabilityMap.get(goal);
        }
        TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(goal);
        if (lastAttempt != null) {
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Last Attempt Status: " + lastAttempt.getStatus());
            }
            if (lastAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
                Runnability runnability = new Runnability(Runnability.Tag.COMPLETED, Integer.MIN_VALUE);
                runnabilityMap.put(goal, runnability);
                return runnability;
            }
            if (lastAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING) {
                Runnability runnability = new Runnability(Runnability.Tag.RUNNING, Integer.MIN_VALUE);
                runnabilityMap.put(goal, runnability);
                return runnability;
            }
        }
        Map connectorPolicyMap = jobRun.getConnectorPolicyMap();
        PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
        Runnability aggregateRunnability = new Runnability(Runnability.Tag.RUNNABLE, 0);
        for (PartitionId pid : goal.getRequiredPartitions()) {
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Inspecting required partition: " + pid);
            }
            Runnability runnability;
            ConnectorDescriptorId cdId = pid.getConnectorDescriptorId();
            IConnectorPolicy cPolicy = connectorPolicyMap.get(cdId);
            PartitionState maxState = pmm.getMaximumAvailableState(pid);
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Policy: " + cPolicy + " maxState: " + maxState);
            }
            if (PartitionState.COMMITTED.equals(maxState)) {
                runnability = new Runnability(Runnability.Tag.RUNNABLE, 0);
            } else if (PartitionState.STARTED.equals(maxState) && !cPolicy.consumerWaitsForProducerToFinish()) {
                runnability = new Runnability(Runnability.Tag.RUNNABLE, 1);
            } else {
                runnability = assignRunnabilityRank(partitionProducingTaskClusterMap.get(pid), runnabilityMap);
                switch (runnability.getTag()) {
                    case RUNNABLE:
                        if (cPolicy.consumerWaitsForProducerToFinish()) {
                            runnability = new Runnability(Runnability.Tag.NOT_RUNNABLE, Integer.MAX_VALUE);
                        } else {
                            runnability = new Runnability(Runnability.Tag.RUNNABLE, runnability.getPriority() + 1);
                        }
                        break;

                    case NOT_RUNNABLE:
                        break;

                    case RUNNING:
                        if (cPolicy.consumerWaitsForProducerToFinish()) {
                            runnability = new Runnability(Runnability.Tag.NOT_RUNNABLE, Integer.MAX_VALUE);
                        } else {
                            runnability = new Runnability(Runnability.Tag.RUNNABLE, 1);
                        }
                        break;
                }
            }
            aggregateRunnability = Runnability.getWorstCase(aggregateRunnability, runnability);
            if (aggregateRunnability.getTag() == Runnability.Tag.NOT_RUNNABLE) {
                // already not runnable -- cannot get better. bail.
                break;
            }
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("aggregateRunnability: " + aggregateRunnability);
            }
        }
        runnabilityMap.put(goal, aggregateRunnability);
        return aggregateRunnability;
    }

    private void assignTaskLocations(TaskCluster tc, Map> taskAttemptMap)
            throws HyracksException {
        ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
        Task[] tasks = tc.getTasks();
        List tcAttempts = tc.getAttempts();
        int attempts = tcAttempts.size();
        TaskClusterAttempt tcAttempt = new TaskClusterAttempt(tc, attempts);
        Map taskAttempts = new HashMap();
        Map locationMap = new HashMap();
        for (int i = 0; i < tasks.length; ++i) {
            Task ts = tasks[i];
            TaskId tid = ts.getTaskId();
            TaskAttempt taskAttempt = new TaskAttempt(tcAttempt, new TaskAttemptId(new TaskId(tid.getActivityId(),
                    tid.getPartition()), attempts), ts);
            taskAttempt.setStatus(TaskAttempt.TaskStatus.INITIALIZED, null);
            locationMap.put(tid,
                    new PartitionLocationExpression(tid.getActivityId().getOperatorDescriptorId(), tid.getPartition()));
            taskAttempts.put(tid, taskAttempt);
        }
        tcAttempt.setTaskAttempts(taskAttempts);
        solver.solve(locationMap.values());
        for (int i = 0; i < tasks.length; ++i) {
            Task ts = tasks[i];
            TaskId tid = ts.getTaskId();
            TaskAttempt taskAttempt = taskAttempts.get(tid);
            String nodeId = assignLocation(acg, locationMap, tid, taskAttempt);
            taskAttempt.setNodeId(nodeId);
            taskAttempt.setStatus(TaskAttempt.TaskStatus.RUNNING, null);
            taskAttempt.setStartTime(System.currentTimeMillis());
            List tads = taskAttemptMap.get(nodeId);
            if (tads == null) {
                tads = new ArrayList();
                taskAttemptMap.put(nodeId, tads);
            }
            OperatorDescriptorId opId = tid.getActivityId().getOperatorDescriptorId();
            jobRun.registerOperatorLocation(opId, tid.getPartition(), nodeId);
            ActivityPartitionDetails apd = ts.getActivityPlan().getActivityPartitionDetails();
            TaskAttemptDescriptor tad = new TaskAttemptDescriptor(taskAttempt.getTaskAttemptId(),
                    apd.getPartitionCount(), apd.getInputPartitionCounts(), apd.getOutputPartitionCounts());
            tads.add(tad);
        }
        tcAttempt.initializePendingTaskCounter();
        tcAttempts.add(tcAttempt);

        /**
         * Improvement for reducing master/slave message communications, for each TaskAttemptDescriptor,
         * we set the NetworkAddress[][] partitionLocations, in which each row is for an incoming connector descriptor
         * and each column is for an input channel of the connector.
         */
        for (Map.Entry> e : taskAttemptMap.entrySet()) {
            List tads = e.getValue();
            for (TaskAttemptDescriptor tad : tads) {
                TaskAttemptId taid = tad.getTaskAttemptId();
                int attempt = taid.getAttempt();
                TaskId tid = taid.getTaskId();
                ActivityId aid = tid.getActivityId();
                List inConnectors = acg.getActivityInputs(aid);
                int[] inPartitionCounts = tad.getInputPartitionCounts();
                if (inPartitionCounts != null) {
                    NetworkAddress[][] partitionLocations = new NetworkAddress[inPartitionCounts.length][];
                    for (int i = 0; i < inPartitionCounts.length; ++i) {
                        ConnectorDescriptorId cdId = inConnectors.get(i).getConnectorId();
                        IConnectorPolicy policy = jobRun.getConnectorPolicyMap().get(cdId);
                        /**
                         * carry sender location information into a task
                         * when it is not the case that it is an re-attempt and the send-side
                         * is materialized blocking.
                         */
                        if (!(attempt > 0 && policy.materializeOnSendSide() && policy
                                .consumerWaitsForProducerToFinish())) {
                            ActivityId producerAid = acg.getProducerActivity(cdId);
                            partitionLocations[i] = new NetworkAddress[inPartitionCounts[i]];
                            for (int j = 0; j < inPartitionCounts[i]; ++j) {
                                TaskId producerTaskId = new TaskId(producerAid, j);
                                String nodeId = findTaskLocation(producerTaskId);
                                partitionLocations[i][j] = ccs.getNodeMap().get(nodeId).getDataPort();
                            }
                        }
                    }
                    tad.setInputPartitionLocations(partitionLocations);
                }
            }
        }

        tcAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.RUNNING);
        tcAttempt.setStartTime(System.currentTimeMillis());
        inProgressTaskClusters.add(tc);
    }

    private String assignLocation(ActivityClusterGraph acg, Map locationMap,
            TaskId tid, TaskAttempt taskAttempt) throws HyracksException {
        ActivityId aid = tid.getActivityId();
        ActivityCluster ac = acg.getActivityMap().get(aid);
        Set blockers = ac.getBlocked2BlockerMap().get(aid);
        String nodeId = null;
        if (blockers != null) {
            for (ActivityId blocker : blockers) {
                nodeId = findTaskLocation(new TaskId(blocker, tid.getPartition()));
                if (nodeId != null) {
                    break;
                }
            }
        }
        Set liveNodes = ccs.getNodeMap().keySet();
        if (nodeId == null) {
            LValueConstraintExpression pLocationExpr = locationMap.get(tid);
            Object location = solver.getValue(pLocationExpr);
            if (location == null) {
                // pick any
                nodeId = liveNodes.toArray(new String[liveNodes.size()])[Math.abs(new Random().nextInt())
                        % liveNodes.size()];
            } else if (location instanceof String) {
                nodeId = (String) location;
            } else if (location instanceof String[]) {
                for (String choice : (String[]) location) {
                    if (liveNodes.contains(choice)) {
                        nodeId = choice;
                        break;
                    }
                }
                if (nodeId == null) {
                    throw new HyracksException("No satisfiable location found for " + taskAttempt.getTaskAttemptId());
                }
            } else {
                throw new HyracksException("Unknown type of value for " + pLocationExpr + ": " + location + "("
                        + location.getClass() + ")");
            }
        }
        if (nodeId == null) {
            throw new HyracksException("No satisfiable location found for " + taskAttempt.getTaskAttemptId());
        }
        if (!liveNodes.contains(nodeId)) {
            throw new HyracksException("Node " + nodeId + " not live");
        }
        return nodeId;
    }

    private String findTaskLocation(TaskId tid) {
        ActivityId aid = tid.getActivityId();
        ActivityCluster ac = jobRun.getActivityClusterGraph().getActivityMap().get(aid);
        Task[] tasks = getActivityClusterPlan(ac).getActivityPlanMap().get(aid).getTasks();
        List tcAttempts = tasks[tid.getPartition()].getTaskCluster().getAttempts();
        if (tcAttempts == null || tcAttempts.isEmpty()) {
            return null;
        }
        TaskClusterAttempt lastTCA = tcAttempts.get(tcAttempts.size() - 1);
        TaskAttempt ta = lastTCA.getTaskAttempts().get(tid);
        return ta == null ? null : ta.getNodeId();
    }

    private static TaskClusterAttempt findLastTaskClusterAttempt(TaskCluster tc) {
        List attempts = tc.getAttempts();
        if (!attempts.isEmpty()) {
            return attempts.get(attempts.size() - 1);
        }
        return null;
    }

    private void startTasks(Map> taskAttemptMap) throws HyracksException {
        final DeploymentId deploymentId = jobRun.getDeploymentId();
        final JobId jobId = jobRun.getJobId();
        final ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
        final Map connectorPolicies = new HashMap(
                jobRun.getConnectorPolicyMap());
        try {
            byte[] acgBytes = JavaSerializationUtils.serialize(acg);
            for (Map.Entry> entry : taskAttemptMap.entrySet()) {
                String nodeId = entry.getKey();
                final List taskDescriptors = entry.getValue();
                final NodeControllerState node = ccs.getNodeMap().get(nodeId);
                if (node != null) {
                    node.getActiveJobIds().add(jobRun.getJobId());
                    boolean changed = jobRun.getParticipatingNodeIds().add(nodeId);
                    if (LOGGER.isLoggable(Level.FINE)) {
                        LOGGER.fine("Starting: " + taskDescriptors + " at " + entry.getKey());
                    }
                    byte[] jagBytes = changed ? acgBytes : null;
                    node.getNodeController().startTasks(deploymentId, jobId, jagBytes, taskDescriptors,
                            connectorPolicies, jobRun.getFlags());
                }
            }
        } catch (Exception e) {
            throw new HyracksException(e);
        }
    }

    private void abortJob(List exceptions) {
        Set inProgressTaskClustersCopy = new HashSet(inProgressTaskClusters);
        for (TaskCluster tc : inProgressTaskClustersCopy) {
            abortTaskCluster(findLastTaskClusterAttempt(tc), TaskClusterAttempt.TaskClusterStatus.ABORTED);
        }
        assert inProgressTaskClusters.isEmpty();
        ccs.getWorkQueue().schedule(new JobCleanupWork(ccs, jobRun.getJobId(), JobStatus.FAILURE, exceptions));
    }

    private void abortTaskCluster(TaskClusterAttempt tcAttempt,
            TaskClusterAttempt.TaskClusterStatus failedOrAbortedStatus) {
        LOGGER.fine("Aborting task cluster: " + tcAttempt.getAttempt());
        Set abortTaskIds = new HashSet();
        Map> abortTaskAttemptMap = new HashMap>();
        for (TaskAttempt ta : tcAttempt.getTaskAttempts().values()) {
            TaskAttemptId taId = ta.getTaskAttemptId();
            TaskAttempt.TaskStatus status = ta.getStatus();
            abortTaskIds.add(taId);
            LOGGER.fine("Checking " + taId + ": " + ta.getStatus());
            if (status == TaskAttempt.TaskStatus.RUNNING || status == TaskAttempt.TaskStatus.COMPLETED) {
                ta.setStatus(TaskAttempt.TaskStatus.ABORTED, null);
                ta.setEndTime(System.currentTimeMillis());
                List abortTaskAttempts = abortTaskAttemptMap.get(ta.getNodeId());
                if (status == TaskAttempt.TaskStatus.RUNNING && abortTaskAttempts == null) {
                    abortTaskAttempts = new ArrayList();
                    abortTaskAttemptMap.put(ta.getNodeId(), abortTaskAttempts);
                }
                if (status == TaskAttempt.TaskStatus.RUNNING) {
                    abortTaskAttempts.add(taId);
                }
            }
        }
        final JobId jobId = jobRun.getJobId();
        LOGGER.fine("Abort map for job: " + jobId + ": " + abortTaskAttemptMap);
        for (Map.Entry> entry : abortTaskAttemptMap.entrySet()) {
            final NodeControllerState node = ccs.getNodeMap().get(entry.getKey());
            final List abortTaskAttempts = entry.getValue();
            if (node != null) {
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine("Aborting: " + abortTaskAttempts + " at " + entry.getKey());
                }
                try {
                    node.getNodeController().abortTasks(jobId, abortTaskAttempts);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        inProgressTaskClusters.remove(tcAttempt.getTaskCluster());
        TaskCluster tc = tcAttempt.getTaskCluster();
        PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
        pmm.removeUncommittedPartitions(tc.getProducedPartitions(), abortTaskIds);
        pmm.removePartitionRequests(tc.getRequiredPartitions(), abortTaskIds);

        tcAttempt.setStatus(failedOrAbortedStatus);
        tcAttempt.setEndTime(System.currentTimeMillis());
    }

    private void abortDoomedTaskClusters() throws HyracksException {
        Set doomedTaskClusters = new HashSet();
        for (TaskCluster tc : inProgressTaskClusters) {
            // Start search at TCs that produce no outputs (sinks)
            if (tc.getProducedPartitions().isEmpty()) {
                findDoomedTaskClusters(tc, doomedTaskClusters);
            }
        }

        for (TaskCluster tc : doomedTaskClusters) {
            TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
            if (tca != null) {
                abortTaskCluster(tca, TaskClusterAttempt.TaskClusterStatus.ABORTED);
            }
        }
    }

    private boolean findDoomedTaskClusters(TaskCluster tc, Set doomedTaskClusters) {
        if (doomedTaskClusters.contains(tc)) {
            return true;
        }
        TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
        if (lastAttempt != null) {
            switch (lastAttempt.getStatus()) {
                case ABORTED:
                case FAILED:
                    return true;

                case COMPLETED:
                    return false;
            }
        }
        Map connectorPolicyMap = jobRun.getConnectorPolicyMap();
        PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
        boolean doomed = false;
        for (TaskCluster depTC : tc.getDependencyTaskClusters()) {
            if (findDoomedTaskClusters(depTC, doomedTaskClusters)) {
                doomed = true;
            }
        }
        for (PartitionId pid : tc.getRequiredPartitions()) {
            ConnectorDescriptorId cdId = pid.getConnectorDescriptorId();
            IConnectorPolicy cPolicy = connectorPolicyMap.get(cdId);
            PartitionState maxState = pmm.getMaximumAvailableState(pid);
            if (maxState == null
                    || (cPolicy.consumerWaitsForProducerToFinish() && maxState != PartitionState.COMMITTED)) {
                if (findDoomedTaskClusters(partitionProducingTaskClusterMap.get(pid), doomedTaskClusters)) {
                    doomed = true;
                }
            }
        }
        if (doomed) {
            doomedTaskClusters.add(tc);
        }
        return doomed;
    }

    public void notifyTaskComplete(TaskAttempt ta) throws HyracksException {
        TaskAttemptId taId = ta.getTaskAttemptId();
        TaskCluster tc = ta.getTask().getTaskCluster();
        TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
        if (lastAttempt != null && taId.getAttempt() == lastAttempt.getAttempt()) {
            TaskAttempt.TaskStatus taStatus = ta.getStatus();
            if (taStatus == TaskAttempt.TaskStatus.RUNNING) {
                ta.setStatus(TaskAttempt.TaskStatus.COMPLETED, null);
                ta.setEndTime(System.currentTimeMillis());
                if (lastAttempt.decrementPendingTasksCounter() == 0) {
                    lastAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.COMPLETED);
                    lastAttempt.setEndTime(System.currentTimeMillis());
                    inProgressTaskClusters.remove(tc);
                    startRunnableActivityClusters();
                }
            } else {
                LOGGER.warning("Spurious task complete notification: " + taId + " Current state = " + taStatus);
            }
        } else {
            LOGGER.warning("Ignoring task complete notification: " + taId + " -- Current last attempt = " + lastAttempt);
        }
    }

    /**
     * Indicates that a single task attempt has encountered a failure.
     * 
     * @param ta
     *            - Failed Task Attempt
     * @param ac
     *            - Activity Cluster that owns this Task
     * @param details
     *            - Cause of the failure
     */
    public void notifyTaskFailure(TaskAttempt ta, ActivityCluster ac, List exceptions) {
        try {
            LOGGER.fine("Received failure notification for TaskAttempt " + ta.getTaskAttemptId());
            TaskAttemptId taId = ta.getTaskAttemptId();
            TaskCluster tc = ta.getTask().getTaskCluster();
            TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
            if (lastAttempt != null && taId.getAttempt() == lastAttempt.getAttempt()) {
                LOGGER.fine("Marking TaskAttempt " + ta.getTaskAttemptId() + " as failed");
                ta.setStatus(TaskAttempt.TaskStatus.FAILED, exceptions);
                abortTaskCluster(lastAttempt, TaskClusterAttempt.TaskClusterStatus.FAILED);
                abortDoomedTaskClusters();
                if (lastAttempt.getAttempt() >= jobRun.getActivityClusterGraph().getMaxReattempts()) {
                    abortJob(exceptions);
                    return;
                }
                startRunnableActivityClusters();
            } else {
                LOGGER.warning("Ignoring task failure notification: " + taId + " -- Current last attempt = "
                        + lastAttempt);
            }
        } catch (Exception e) {
            abortJob(Collections.singletonList(e));
        }
    }

    /**
     * Indicates that the provided set of nodes have left the cluster.
     * 
     * @param deadNodes
     *            - Set of failed nodes
     */
    public void notifyNodeFailures(Set deadNodes) {
        try {
            jobRun.getPartitionMatchMaker().notifyNodeFailures(deadNodes);
            jobRun.getParticipatingNodeIds().removeAll(deadNodes);
            jobRun.getCleanupPendingNodeIds().removeAll(deadNodes);
            if (jobRun.getPendingStatus() != null && jobRun.getCleanupPendingNodeIds().isEmpty()) {
                finishJob(jobRun);
                return;
            }
            for (ActivityCluster ac : jobRun.getActivityClusterGraph().getActivityClusterMap().values()) {
                if (isPlanned(ac)) {
                    TaskCluster[] taskClusters = getActivityClusterPlan(ac).getTaskClusters();
                    if (taskClusters != null) {
                        for (TaskCluster tc : taskClusters) {
                            TaskClusterAttempt lastTaskClusterAttempt = findLastTaskClusterAttempt(tc);
                            if (lastTaskClusterAttempt != null
                                    && (lastTaskClusterAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED || lastTaskClusterAttempt
                                            .getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING)) {
                                boolean abort = false;
                                for (TaskAttempt ta : lastTaskClusterAttempt.getTaskAttempts().values()) {
                                    assert (ta.getStatus() == TaskAttempt.TaskStatus.COMPLETED || ta.getStatus() == TaskAttempt.TaskStatus.RUNNING);
                                    if (deadNodes.contains(ta.getNodeId())) {
                                        ta.setStatus(
                                                TaskAttempt.TaskStatus.FAILED,
                                                Collections.singletonList(new Exception("Node " + ta.getNodeId()
                                                        + " failed")));
                                        ta.setEndTime(System.currentTimeMillis());
                                        abort = true;
                                    }
                                }
                                if (abort) {
                                    abortTaskCluster(lastTaskClusterAttempt,
                                            TaskClusterAttempt.TaskClusterStatus.ABORTED);
                                }
                            }
                        }
                        abortDoomedTaskClusters();
                    }
                }
            }
            startRunnableActivityClusters();
        } catch (Exception e) {
            abortJob(Collections.singletonList(e));
        }
    }

    private void finishJob(final JobRun run) {
        JobId jobId = run.getJobId();
        CCApplicationContext appCtx = ccs.getApplicationContext();
        if (appCtx != null) {
            try {
                appCtx.notifyJobFinish(jobId);
            } catch (HyracksException e) {
                e.printStackTrace();
            }
        }
        run.setStatus(run.getPendingStatus(), run.getPendingExceptions());
        ccs.getActiveRunMap().remove(jobId);
        ccs.getRunMapArchive().put(jobId, run);
        ccs.getRunHistory().put(jobId, run.getExceptions());

        if (run.getActivityClusterGraph().isReportTaskDetails()) {
            /**
             * log job details when task-profiling is enabled
             */
            try {
                ccs.getJobLogFile().log(createJobLogObject(run));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    private JSONObject createJobLogObject(final JobRun run) {
        JSONObject jobLogObject = new JSONObject();
        try {
            ActivityClusterGraph acg = run.getActivityClusterGraph();
            jobLogObject.put("activity-cluster-graph", acg.toJSON());
            jobLogObject.put("job-run", run.toJSON());
        } catch (JSONException e) {
            throw new RuntimeException(e);
        }
        return jobLogObject;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy