All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.fenzo.samples.SampleFramework Maven / Gradle / Ivy

/*
 * Copyright 2015 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netflix.fenzo.samples;

import com.netflix.fenzo.SchedulingResult;
import com.netflix.fenzo.TaskAssignmentResult;
import com.netflix.fenzo.TaskRequest;
import com.netflix.fenzo.TaskScheduler;
import com.netflix.fenzo.VMAssignmentResult;
import com.netflix.fenzo.VirtualMachineLease;
import com.netflix.fenzo.functions.Action1;
import com.netflix.fenzo.functions.Func1;
import com.netflix.fenzo.plugins.VMLeaseObject;
import org.apache.mesos.MesosSchedulerDriver;
import org.apache.mesos.Protos;
import org.apache.mesos.Scheduler;
import org.apache.mesos.SchedulerDriver;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

/**
 * A sample Mesos framework that shows a sample usage of Fenzo {@link TaskScheduler}.
 */
public class SampleFramework {

    /**
     * A sample mesos scheduler that shows how mesos callbacks can be setup for use with Fenzo TaskScheduler.
     */
    public class MesosScheduler implements Scheduler {

        /**
         * When we register successfully with mesos, any previous resource offers are invalid. Tell Fenzo scheduler
         * to expire all leases (aka offers) right away.
         */
        @Override
        public void registered(SchedulerDriver driver, Protos.FrameworkID frameworkId, Protos.MasterInfo masterInfo) {
            System.out.println("Registered! ID = " + frameworkId.getValue());
            scheduler.expireAllLeases();
        }

        /**
         * Similar to {@code registered()} method, expire any previously known resource offers by asking Fenzo
         * scheduler to expire all leases right away.
         */
        @Override
        public void reregistered(SchedulerDriver driver, Protos.MasterInfo masterInfo) {
            System.out.println("Re-registered " + masterInfo.getId());
            scheduler.expireAllLeases();
        }

        /**
         * Add the received Mesos resource offers to the lease queue. Fenzo scheduler is used by calling its main
         * allocation routine in a loop, see {@link SampleFramework#runAll()}. Collect offers from mesos into a queue
         * so the next call to Fenzo's allocation routine can pick them up.
         */
        @Override
        public void resourceOffers(SchedulerDriver driver, List offers) {
            for(Protos.Offer offer: offers) {
                System.out.println("Adding offer " + offer.getId() + " from host " + offer.getHostname());
                leasesQueue.offer(new VMLeaseObject(offer));
            }
        }

        /**
         * Tell Fenzo scheduler that a resource offer should be expired immediately.
         */
        @Override
        public void offerRescinded(SchedulerDriver driver, Protos.OfferID offerId) {
            scheduler.expireLease(offerId.getValue());
        }

        /**
         * Update Fenzo scheduler of task completion if received status indicates a terminal state. There is no need
         * to tell Fenzo scheduler of task started because that is supposed to have been already done before launching
         * the task in Mesos.
         *
         * In a real world framework, this state change would also be persisted with a state machine of choice.
         */
        @Override
        public void statusUpdate(SchedulerDriver driver, Protos.TaskStatus status) {
            System.out.println("Task Update: " + status.getTaskId().getValue() + " in state " + status.getState());
            switch (status.getState()) {
                case TASK_FAILED:
                case TASK_LOST:
                case TASK_FINISHED:
                    onTaskComplete.call(status.getTaskId().getValue());
                    scheduler.getTaskUnAssigner().call(status.getTaskId().getValue(), launchedTasks.get(status.getTaskId().getValue()));
                    break;
            }
        }

        @Override
        public void frameworkMessage(SchedulerDriver driver, Protos.ExecutorID executorId, Protos.SlaveID slaveId, byte[] data) {}

        @Override
        public void disconnected(SchedulerDriver driver) {}

        /**
         * Upon slave lost notification, tell Fenzo scheduler to expire all leases with the given slave ID. Note, however,
         * that if there was no offer received from that slave prior to this call, Fenzo would not have a mapping from
         * the slave ID to hostname (Fenzo maintains slaves state by hostname). This is OK since there would be no offers
         * to expire. However, any tasks running on the lost slave will not be removed by this call to Fenzo. Task lost
         * status updates would ensure that.
         */
        @Override
        public void slaveLost(SchedulerDriver driver, Protos.SlaveID slaveId) {
            scheduler.expireAllLeasesByVMId(slaveId.getValue());
        }

        /**
         * Do nothing, instead, rely on task lost status updates to inform Fenzo of task completions.
         */
        @Override
        public void executorLost(SchedulerDriver driver, Protos.ExecutorID executorId, Protos.SlaveID slaveId, int status) {
            System.out.println("Executor " + executorId.getValue() + " lost, status=" + status);
        }

        @Override
        public void error(SchedulerDriver driver, String message) {}
    }

    private final BlockingQueue taskQueue;
    private final BlockingQueue leasesQueue;
    private final Map launchedTasks;
    private final TaskScheduler scheduler;
    private final MesosSchedulerDriver mesosSchedulerDriver;
    private final AtomicReference ref = new AtomicReference<>();
    private final Map pendingTasksMap = new HashMap<>();
    private final Action1 onTaskComplete;
    private final Func1 taskCmdGetter;
    private final AtomicBoolean isShutdown = new AtomicBoolean(false);

    /**
     * Create a sample mesos framework with the given task queue and mesos master connection string. As would be typical
     * for frameworks that wish to use Fenzo task scheduler, a lease queue is created for mesos scheduler callback to
     * insert offers received from mesos. This sample implementation obtains the tasks to run via a task queue. The
     * {@link SampleFramework#runAll()} method implements the scheduling loop that continuously takes pending tasks from
     * the queue and uses Fenzo's task scheduler to assign resources to them.
     *
     * The task scheduler created in this sample is a rather simple one, with no advanced features.
     *
     * @param taskQueue The task queue.
     * @param mesosMaster Connection string for mesos master.
     * @param onTaskComplete A single argument action trigger to invoke upon task completion, with task ID is the argument.
     * @param taskCmdGetter A single argument function to invoke to get the command line to execute for a given task ID,
     *                      passed as the only argument.
     */
    public SampleFramework(BlockingQueue taskQueue, String mesosMaster, Action1 onTaskComplete,
                           Func1 taskCmdGetter) {
        this.taskQueue = taskQueue;
        this.leasesQueue = new LinkedBlockingQueue<>();
        this.onTaskComplete = onTaskComplete;
        this.taskCmdGetter = taskCmdGetter;
        launchedTasks = new HashMap<>();
        scheduler = new TaskScheduler.Builder()
                .withLeaseOfferExpirySecs(1000000000)
                .withLeaseRejectAction(new Action1() {
                    @Override
                    public void call(VirtualMachineLease lease) {
                        System.out.println("Declining offer on " + lease.hostname());
                        ref.get().declineOffer(lease.getOffer().getId());
                    }
                })
                .build();
        Protos.FrameworkInfo framework = Protos.FrameworkInfo.newBuilder()
                .setName("Sample Fenzo Framework")
                .setUser("")
                .build();
        Scheduler mesosScheduler = new MesosScheduler();
        mesosSchedulerDriver = new MesosSchedulerDriver(mesosScheduler, framework, mesosMaster);
        ref.set(mesosSchedulerDriver);
        new Thread() {
            public void run() {
                mesosSchedulerDriver.run();
            }
        }.start();
    }

    /**
     * Shuts down the Mesos driver.
     */
    public void shutdown() {
        System.out.println("Stopping down mesos driver");
        Protos.Status status = mesosSchedulerDriver.stop();
        isShutdown.set(true);
    }

    public void start() {
        new Thread(new Runnable() {
            @Override
            public void run() {
                runAll();
            }
        }).start();
    }

    /**
     * Run scheduling loop until shutdown is called.
     * This sample implementation shows the general pattern of using Fenzo's task scheduler. Scheduling occurs in a
     * continuous loop, iteratively calling {@link TaskScheduler#scheduleOnce(List, List)} method, passing in the
     * list of tasks currently pending launch, and a list of any new resource offers obtained from mesos since the last
     * time the lease queue was drained. The call returns an assignments result object from which any tasks assigned
     * can be launched via the mesos driver. The result contains a map with hostname as the key and assignment result
     * as the value. The assignment result contains the resource offers of the host that were used for the assignments
     * and a list of tasks assigned resources from the host. The resource offers were removed from the internal state in
     * Fenzo. However, the task assignments are not updated, yet. If the returned assignments are being used to launch
     * the tasks in mesos, call Fenzo's task assigner for each task launched to indicate that the task is being launched.
     * If the returned assignments are not being used, the resource offers in the assignment results must either be
     * rejected in mesos, or added back into Fenzo explicitly.
     */
    void runAll() {
        System.out.println("Running all");
        List newLeases = new ArrayList<>();
        while(true) {
            if(isShutdown.get())
                return;
            newLeases.clear();
            List newTaskRequests = new ArrayList<>();
            System.out.println("#Pending tasks: " + pendingTasksMap.size());
            TaskRequest taskRequest=null;
            try {
                taskRequest = pendingTasksMap.size()==0?
                        taskQueue.poll(5, TimeUnit.SECONDS) :
                        taskQueue.poll(1, TimeUnit.MILLISECONDS);
            }
            catch (InterruptedException ie) {
                System.err.println("Error polling task queue: " + ie.getMessage());
            }
            if(taskRequest!=null) {
                taskQueue.drainTo(newTaskRequests);
                newTaskRequests.add(0, taskRequest);
                for(TaskRequest request: newTaskRequests)
                    pendingTasksMap.put(request.getId(), request);
            }
            leasesQueue.drainTo(newLeases);
            SchedulingResult schedulingResult = scheduler.scheduleOnce(new ArrayList<>(pendingTasksMap.values()), newLeases);
            System.out.println("result=" + schedulingResult);
            Map resultMap = schedulingResult.getResultMap();
            if(!resultMap.isEmpty()) {
                for(VMAssignmentResult result: resultMap.values()) {
                    List leasesUsed = result.getLeasesUsed();
                    List taskInfos = new ArrayList<>();
                    StringBuilder stringBuilder = new StringBuilder("Launching on VM " + leasesUsed.get(0).hostname() + " tasks ");
                    final Protos.SlaveID slaveId = leasesUsed.get(0).getOffer().getSlaveId();
                    for(TaskAssignmentResult t: result.getTasksAssigned()) {
                        stringBuilder.append(t.getTaskId()).append(", ");
                        taskInfos.add(getTaskInfo(slaveId, t.getTaskId(), taskCmdGetter.call(t.getTaskId())));
                        // unqueueTask task from pending tasks map and put into launched tasks map
                        // (in real world, transition the task state)
                        pendingTasksMap.remove(t.getTaskId());
                        launchedTasks.put(t.getTaskId(), leasesUsed.get(0).hostname());
                        scheduler.getTaskAssigner().call(t.getRequest(), leasesUsed.get(0).hostname());
                    }
                    List offerIDs = new ArrayList<>();
                    for(VirtualMachineLease l: leasesUsed)
                        offerIDs.add(l.getOffer().getId());
                    System.out.println(stringBuilder.toString());
                    mesosSchedulerDriver.launchTasks(offerIDs, taskInfos);
                }
            }
            // insert a short delay before scheduling any new tasks or tasks from before that haven't been launched yet.
            try{Thread.sleep(100);}catch(InterruptedException ie){}
        }
    }

    static Protos.TaskInfo getTaskInfo(Protos.SlaveID slaveID, final String taskId, String cmd) {
        Protos.TaskID pTaskId = Protos.TaskID.newBuilder()
                .setValue(taskId).build();
        return Protos.TaskInfo.newBuilder()
                .setName("task " + pTaskId.getValue())
                .setTaskId(pTaskId)
                .setSlaveId(slaveID)
                .addResources(Protos.Resource.newBuilder()
                        .setName("cpus")
                        .setType(Protos.Value.Type.SCALAR)
                        .setScalar(Protos.Value.Scalar.newBuilder().setValue(1)))
                .addResources(Protos.Resource.newBuilder()
                        .setName("mem")
                        .setType(Protos.Value.Type.SCALAR)
                        .setScalar(Protos.Value.Scalar.newBuilder().setValue(128)))
                .setCommand(Protos.CommandInfo.newBuilder().setValue(cmd).build())
                .build();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy