org.cloudsimplus.faultinjection.HostFaultInjection Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cloudsim-plus Show documentation
CloudSim Plus: A modern, highly extensible and easier-to-use Java 8 Framework for Modeling and Simulation of Cloud Computing Infrastructures and Services
There is a newer version: 8.0.0
Show newest version
/*
 * CloudSim Plus: A modern, highly-extensible and easier-to-use Framework for
 * Modeling and Simulation of Cloud Computing Infrastructures and Services.
 * http://cloudsimplus.org
 *
 *     Copyright (C) 2015-2021 Universidade da Beira Interior (UBI, Portugal) and
 *     the Instituto Federal de Educação Ciência e Tecnologia do Tocantins (IFTO, Brazil).
 *
 *     This file is part of CloudSim Plus.
 *
 *     CloudSim Plus is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     CloudSim Plus is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with CloudSim Plus. If not, see .
 */
package org.cloudsimplus.faultinjection;

import org.cloudbus.cloudsim.brokers.DatacenterBroker;
import org.cloudbus.cloudsim.cloudlets.Cloudlet;
import org.cloudbus.cloudsim.core.AbstractMachine;
import org.cloudbus.cloudsim.core.CloudSimEntity;
import org.cloudbus.cloudsim.core.CloudSimTag;
import org.cloudbus.cloudsim.core.Simulation;
import org.cloudbus.cloudsim.core.events.SimEvent;
import org.cloudbus.cloudsim.datacenters.Datacenter;
import org.cloudbus.cloudsim.distributions.ContinuousDistribution;
import org.cloudbus.cloudsim.distributions.PoissonDistr;
import org.cloudbus.cloudsim.distributions.StatisticalDistribution;
import org.cloudbus.cloudsim.distributions.UniformDistr;
import org.cloudbus.cloudsim.hosts.Host;
import org.cloudbus.cloudsim.hosts.HostSimple;
import org.cloudbus.cloudsim.resources.Pe;
import org.cloudbus.cloudsim.util.TimeUtil;
import org.cloudbus.cloudsim.vms.Vm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Predicate;

import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static org.cloudbus.cloudsim.core.CloudSimTag.HOST_FAILURE;

/**
 * Generates random failures for the {@link Pe}'s of {@link Host}s
 * inside a given {@link Datacenter}.
 * A Fault Injection object
 * usually has to be created after the VMs are created,
 * to make it easier to define a function to be used
 * to clone failed VMs.
 *
 * 
 * The events happens in the following order:
 * 

 *  a time to inject a Host failure is generated using a given Random Number Generator;
 *  a Host is randomly selected to fail at that time using an internal
 *  Uniform Random Number Generator with the same seed of the given generator;
 *  the number of Host PEs to fail is randomly generated using the internal generator;
 *  failed physical PEs are removed from affected VMs, VMs with no remaining
 *  PEs and destroying and clones of them are submitted to the {@link DatacenterBroker}
 *  of the failed VMs;
 *  another failure is scheduled for a future time using the given generator;
 *  the process repeats until the end of the simulation.
 * 
 * 
 *
 * 
 * When Host's PEs fail, if there are more available PEs
 * than the required by its running VMs, no VM will be affected.
 * 
 *
 * 
 * Considering that X is the number of failed PEs and it is
 * lower than the total available PEs.
 * In this case, the X PEs will be removed cyclically, 1 by 1,
 * from running VMs.
 * This way, some VMs may continue running
 * with less PEs than they requested initially.
 * On the other hand, if after the failure the number of Host working PEs
 * is lower than the required to run all VMs, some VMs will be
 * destroyed.
 * 
 *
 * 
 * If all PEs are removed from a VM, it is automatically destroyed
 * and a snapshot (clone) from it is taken and submitted
 * to the broker, so that the clone can start executing
 * into another host. In this case, all the cloudlets
 * which were running inside the VM yet, will be
 * cloned to and restart executing from the beginning.
 * 
 *
 * 
 * If a cloudlet running inside a VM which was affected by a PE failure
 * requires Y PEs but the VMs doesn't have such PEs anymore,
 * the Cloudlet will continue executing, but it will spend
 * more time to finish.
 * For instance, if a Cloudlet requires 2 PEs but after the failure
 * the VM was left with just 1 PE, the Cloudlet will spend the double
 * of the time to finish.
 * 
 *
 * 
 * NOTES:
 * 

 *     
 *      Host PEs failures may happen after all its VMs have finished executing.
 *      This way, the presented simulation results may show that the
 *      number of PEs into a Host is lower than the required by its VMs.
 *      In this case, the VMs shown in the results finished executing before
 *      some failures have happened. Analysing the logs is easy to
 *      confirm that.
 *      
 *      Failures inter-arrivals are defined in minutes, since seconds is a too
 *      small time unit to define such value. Furthermore, it doesn't make sense to define
 *      the number of failures per second. This way, the generator of failure arrival times
 *      given to the constructor considers the time in minutes, despite the simulation
 *      time unit is seconds. Since commonly Cloudlets just take some seconds to finish,
 *      mainly in simulation examples, failures may happen just after the cloudlets
 *      have finished. This way, one usually should make sure that Cloudlets' length
 *      are large enough to allow failures to happen before they end.
 *      
 * 
 * 
 *
 * For more details, check
 * Raysa Oliveira's Master Thesis (only in Portuguese).
 *
 * @author raysaoliveira
 * @since CloudSim Plus 1.2.0
 * @see SAP Blog: Availability vs Reliability
 *
 * TODO The class has multiple responsibilities.
 *      The fault injection mechanism must be separated from
 *      the fault recovery. The cloner methods are fault recovery.
 */
public class HostFaultInjection extends CloudSimEntity {
    /**
     * Maximum number of seconds for a VM to recovery from a failure,
     * which is randomly selected based on this value.
     * The recovery time is the delay that will be set
     * to start a clone from a failed VM.
     */
    private static final int MAX_VM_RECOVERY_TIME_SECS = 450;

    private static final Logger LOGGER = LoggerFactory.getLogger(HostFaultInjection.class.getSimpleName());

    /**
     * @see #getLastFailedHost()
     */
    private Host lastFailedHost;

    /**
     * Number of PEs failed into the {@link #lastFailedHost}.
     */
    private int lastFailedPesNumber;

    /**
     * @see #getDatacenter()
     */
    private Datacenter datacenter;

    /**
     * A Pseudo Random Number Generator used to select a Host
     * and the number of PEs to set as fail.
     */
    private final ContinuousDistribution random;

    /**
     * A map that stores {@link VmCloner} objects to be used to clone
     * the VMs belonging to a broker.
     *
     * @see #addVmCloner(DatacenterBroker, VmCloner)
     */
    private final Map vmClonerMap;

    /**
     * A Pseudo Random Number Generator which generates the times (in hours)
     * that Hosts failures will occur.
     */
    private final StatisticalDistribution faultArrivalHoursGenerator;

    /**
     * The number of host failures that have happened in the simulation.
     */
    private int hostFaultsNumber;

    /**
     * A map to store the time (in seconds) VM failures took to be recovered,
     * which is when a clone from the last failed VM for a given broker is created.
     * Since a broker just creates a VM clone when all its VMs have failed,
     * only at that time the failure is in fact recovered.
     *
     * It means the time period failure of all VMs persisted
     * before a clone was created.
     */
    private final Map vmRecoveryTimeSecsMap;

    /**
     * A map to store the times (in seconds) for each Host failure.
     */
    private final Map> hostFaultsTimeSecsMap;

    /**
     * A map to store the number of faults that affected all VMs from each broker.
     */
    private final Map vmFaultsByBroker;

    /** @see #getMaxTimeToFailInHours() */
    private double maxTimeToFailInHours;

    /**
     * Creates a fault injection mechanism for the Hosts of a given {@link Datacenter}.
     * The Hosts failures are randomly injected according to a {@link UniformDistr}
     * pseudo random number generator, which indicates the mean of failures to be
     * generated per hour, (which is also called event rate or rate parameter).
     *
     * @param datacenter the Datacenter to which failures will be randomly injected for its Hosts
     * @see #HostFaultInjection(Datacenter, StatisticalDistribution)
     */
    public HostFaultInjection(final Datacenter datacenter) {
      this(datacenter, new UniformDistr());
    }

    /**
     * Creates a fault injection mechanism for the Hosts of a given {@link Datacenter}.
     * The Hosts failures are randomly injected according to the given
     * pseudo random number generator, that indicates the mean of failures to be generated
     * per minute, (which is also called event rate or rate parameter).
     *  @param datacenter the Datacenter to which failures will be randomly injected for its Hosts
     *
     * @param faultArrivalHoursGenerator a Pseudo Random Number Generator which generates the
     * times Hosts failures will occur (in hours).
     * The values returned by the generator will be considered to be hours.
     * Frequently it is used a
     * {@link PoissonDistr} to generate failure arrivals, but any {@link ContinuousDistribution}
     */
    public HostFaultInjection(final Datacenter datacenter, final StatisticalDistribution faultArrivalHoursGenerator) {
        super(datacenter.getSimulation());
        this.setDatacenter(datacenter);
        this.lastFailedHost = Host.NULL;
        this.faultArrivalHoursGenerator = faultArrivalHoursGenerator;
        this.random = new UniformDistr(faultArrivalHoursGenerator.getSeed()+1);
        this.vmRecoveryTimeSecsMap = new HashMap<>();
        this.hostFaultsTimeSecsMap = new HashMap<>();
        this.vmFaultsByBroker = new HashMap<>();
        this.vmClonerMap = new HashMap<>();
        this.maxTimeToFailInHours = Double.MAX_VALUE;
    }

    @Override
    protected void startInternal() {
        scheduleFaultInjection();
    }

    /**
     * Schedules a message to be processed internally
     * to try injecting a Host PEs failure.
     */
    private void scheduleFaultInjection() {
        final Simulation sim = getSimulation();
        final Predicate otherEventsPredicate = evt -> evt.getTag() != HOST_FAILURE;

        /*
        Just re-schedule more failures if there are other events to be processed.
        Otherwise, the simulation has finished and no more failures should be scheduled.

        The 2nd condition may be a complex operation that must be called only when necessary
        in the short-circuit below.
        */
        if (sim.clock() < getMaxTimeToFailInSecs() || sim.isThereAnyFutureEvt(otherEventsPredicate)) {
            schedule(this, getTimeDelayForNextFault(), HOST_FAILURE);
        }
    }

    /**
     * Gets the time delay in seconds, from the current simulation time,
     * that the next failure will be injected.
     * Since the values returned by the {@link #faultArrivalHoursGenerator}
     * are considered to be in hours, such values are converted to seconds.
     *
     * @return the next failure injection delay in seconds
     */
    private double getTimeDelayForNextFault() {
        return faultArrivalHoursGenerator.sample() * 3600;
    }

    @Override
    public void processEvent(final SimEvent evt) {
        if (evt.getTag() == HOST_FAILURE) {
            generateHostFaultAndScheduleNext();
        }
    }

    /**
     * Generates a fault for all PEs of a Host.
     * @param host the Host to generate the fault to.
     */
    public void generateHostFault(final Host host){
        generateHostFault(host, host.getWorkingPesNumber());
    }

    /**
     * Generates a fault for a given number of random PEs of a Host.
     * @param host the Host to generate the fault to.
     * @param pesFailures number of PEs that must fail
     */
    public void generateHostFault(final Host host, final int pesFailures){
        if(Host.NULL == host){
            return;
        }

        this.lastFailedHost = host;

        hostFaultsNumber++;
        registerHostFaultTime();

        final long previousNumOfWorkingPes = lastFailedHost.getWorkingPesNumber();
        this.lastFailedPesNumber = generateHostPesFaults(pesFailures);
        final long hostWorkingPes = lastFailedHost.getWorkingPesNumber();
        final long vmsRequiredPes = getWorkingVmsPesCount();

        final String msg = lastFailedHost.getVmList().isEmpty() ? "" : " | VMs required PEs: " + vmsRequiredPes;
        if(hostWorkingPes > 0) {
            LOGGER.error(
                "{}: {}: Generated {} PEs failures from {} previously working PEs for {} at minute {}.{}" +
                "\t  Current Working PEs: {} | Number of VMs: {}{}",
                getSimulation().clockStr(), getClass().getSimpleName(), lastFailedPesNumber,
                previousNumOfWorkingPes, lastFailedHost, getSimulation().clock() / 60, System.lineSeparator(),
                hostWorkingPes, lastFailedHost.getVmList().size(), msg);
        }

        if (hostWorkingPes == 0) {
            setAllVmsToFailed();
        } else if (hostWorkingPes >= vmsRequiredPes) {
            logNoVmFault();
        } else {
            deallocateFailedHostPesFromVms();
        }
    }

    /**
     * Generates a failure for a specific number of PEs from a
     * randomly selected Host and schedules the next time to try generating a fault.
     */
    private void generateHostFaultAndScheduleNext() {
        try {
            final Host host = getRandomHost();
            generateHostFault(host, randomFailedPesNumber(host));
        } finally {
            //schedules the next failure injection try
            scheduleFaultInjection();
        }
    }

    /**
     * Register the time for a Host failure.
     */
    private void registerHostFaultTime() {
        hostFaultsTimeSecsMap.computeIfAbsent(lastFailedHost, host -> new ArrayList<>()).add(getSimulation().clock());
    }

    /**
     * Randomly gets a Host that will have some PEs set to failed.
     *
     * @return the randomly selected Host; or {@link Host#NULL} if the Datacenter
     * doesn't have Hosts or the selected one doesn't have more PEs.
     */
    private Host getRandomHost() {
        if (datacenter.getHostList().isEmpty()) {
            return Host.NULL;
        }

        final int idx = (int) (random.sample() * datacenter.getHostList().size());
        return datacenter.getHost(idx);
    }

    /**
     * Sets all VMs inside the {@link #getLastFailedHost() last failed Host} to
     * failed, when all Host PEs have failed.
     */
    private void setAllVmsToFailed() {
        final int vms = lastFailedHost.getVmList().size();
        final String msg = vms > 0 ? String.format("affecting all its %d VMs", vms) : "but there was no running VM";
        LOGGER.error(
                "{}: All the {} PEs of {} failed, {}.",
                getSimulation().clockStr(), lastFailedHost.getNumberOfPes(), lastFailedHost, msg);
        setVmListToFailed(lastFailedHost.getVmList());
    }

    /**
     * Shows that the failure of Host PEs hasn't affected any VM, because there
     * is more working PEs than required by all VMs.
     */
    private void logNoVmFault() {
        if(lastFailedHost.getVmList().isEmpty()){
            LOGGER.info("\tThere aren't VMs running on the failed Host.");
            return;
        }

        final int vmsRequiredPes = (int) getWorkingVmsPesCount();
        LOGGER.info(
                "\tNumber of failed PEs is less than PEs required by all its {} VMs, thus it doesn't affect any VM.{}" +
                "Total PEs: {} | Total Failed PEs: {} | Working PEs: {} | Current PEs required by VMs: {}.",
                lastFailedHost.getVmList().size(), System.lineSeparator(),
                lastFailedHost.getNumberOfPes(), lastFailedHost.getFailedPesNumber(),
                lastFailedHost.getWorkingPesNumber(), vmsRequiredPes);
    }

    /**
     * De-allocates the physical PEs failed for the
     * {@link #getLastFailedHost() last failed Host} from affected VMs.
     */
    private void deallocateFailedHostPesFromVms() {
        LOGGER.error("\t{} PEs just failed. There is a total of {} working PEs.",
            lastFailedPesNumber,
                lastFailedHost.getWorkingPesNumber());
        cyclicallyRemoveFailedHostPesFromVms();

        final List vmsWithoutPes =
            lastFailedHost.getVmList()
                .stream()
                .filter(vm -> vm.getNumberOfPes() == 0)
                .collect(toList());
        setVmListToFailed(vmsWithoutPes);
    }

    /**
     * Removes one physical failed PE from one affected VM at a time.
     * Affected VMs are dealt as a circular list, visiting
     * one VM at a time to remove 1 PE from it,
     * until all the failed PEs are removed.
     *
     */
    private void cyclicallyRemoveFailedHostPesFromVms() {
        int failedPesToRemoveFromVms = failedPesToRemoveFromVms();
        List vmsWithPes = getVmsWithPEsFromFailedHost();
        final int affectedVms = Math.min(vmsWithPes.size(), failedPesToRemoveFromVms);

        LOGGER.warn("\t{} VMs affected from a total of {}. {} PEs are going to be removed from them.",
                affectedVms, lastFailedHost.getVmList().size(), failedPesToRemoveFromVms);
        int idx = 0;
        while (!vmsWithPes.isEmpty() && failedPesToRemoveFromVms > 0) {
            failedPesToRemoveFromVms--;
            idx = idx % vmsWithPes.size();
            final Vm vm = vmsWithPes.get(idx);
            lastFailedHost.getVmScheduler().deallocatePesFromVm(vm, 1);
            vm.getCloudletScheduler().deallocatePesFromVm(1);
            //remove 1 failed PE from the VM
            vm.getProcessor().deallocateAndRemoveResource(1);

            LOGGER.warn(
                    "\tRemoving 1 PE from VM {} due to Host PE failure. New VM PEs Number: {}",
                    vm.getId(), vm.getNumberOfPes());
            idx++;
            vmsWithPes = getVmsWithPEsFromFailedHost();
        }
    }

    /**
     * Gets the number of failed PEs to remove from VMs.
     * @return
     */
    private int failedPesToRemoveFromVms() {
        final int hostWorkingPes = lastFailedHost.getWorkingPesNumber();
        final int vmsRequiredPes = (int) getWorkingVmsPesCount();
        return vmsRequiredPes - hostWorkingPes;
    }

    /**
     * Gets a List of VMs that have any PE from the {@link #lastFailedHost}.
     * @return
     */
    private List getVmsWithPEsFromFailedHost() {
        return lastFailedHost
                .getVmList()
                .stream()
                .filter(vm -> vm.getNumberOfPes() > 0)
                .collect(toList());
    }

    /**
     * Sets all VMs from a given list as failed, due to Host PEs failures.
     */
    private void setVmListToFailed(final List vms) {
        final Map lastVmFailedByBroker = getLastFailedVmByBroker(vms);

        vms.forEach(this::setVmToFailed);
        lastVmFailedByBroker.forEach(this::createVmCloneIfAllVmsDestroyed);
    }

    private Map getLastFailedVmByBroker(final List vmsWithoutPes) {
        final var vmComparator = Comparator.comparingLong(Vm::getId);
        return vmsWithoutPes
                    .stream()
                    .collect(
                        toMap(Vm::getBroker, Function.identity(), BinaryOperator.maxBy(vmComparator))
                    );
    }

    /**
     * Creates a VM for the last failed VM if all VMs belonging to the broker have failed
     * and the maximum number of clones to create was not reached.
     *
     * 
     * If all VMs have failed and a {@link VmCloner} is not set or the max number of
     * clones already was created, from the time of the failure
     * until the end of the simulation, this interval the customer
     * service is completely unavailable.
     *
     * Since the map below stores recovery times and not unavailability times,
     * it's being store the failure time as a negative value.
     * This way, when computing the availability for the customer,
     * these negative values are changed to: lastSimulationTime - |negativeRecoveryTime|.
     * Using this logic, is like the VM was recovered only in the end of the simulation.
     * It in fact is not recovered, but this logic has to be applied to
     * allow computing the availability.
     *
     * 
     * @param broker
     * @param lastVmFailedFromBroker
     */
    private void createVmCloneIfAllVmsDestroyed(final DatacenterBroker broker, final Vm lastVmFailedFromBroker) {
        if(isSomeVmWorking(broker)){
            return;
        }

        if(!isVmClonerSet(broker) || getVmCloner(broker).isMaxClonesNumberReached()) {
            vmRecoveryTimeSecsMap.put(lastVmFailedFromBroker, -getSimulation().clock());
        }

        if(!isVmClonerSet(broker)) {
            LOGGER.warn("\tA Vm Cloner was not set for {}. So that VM failure will not be recovered.", broker);
            return;
        }

        final VmCloner cloner = getVmCloner(broker);
        if(cloner.isMaxClonesNumberReached()){
            LOGGER.warn("\tThe maximum allowed number of {} VMs to create has been reached.", cloner.getMaxClonesNumber());
            return;
        }

        registerFaultOfAllVms(broker);
        final double recoveryTimeSecs = getRandomRecoveryTimeForVmInSecs();
        LOGGER.info("\tTime to recovery from fault by cloning the failed VM: {} minutes", recoveryTimeSecs/60.0);

        final Map.Entry> entry = cloner.clone(lastVmFailedFromBroker);

        final Vm clonedVm = entry.getKey();
        final List clonedCloudlets = entry.getValue();
        clonedVm.setSubmissionDelay(recoveryTimeSecs);
        clonedVm.addOnHostAllocationListener(evt -> vmRecoveryTimeSecsMap.put(evt.getVm(), recoveryTimeSecs));
        broker.submitVm(clonedVm);
        broker.submitCloudletList(clonedCloudlets, recoveryTimeSecs);
    }

    /**
     * Sets a VM inside the {@link #getLastFailedHost() last failed Host} to
     * failed and use the VM and Cloudlets cloner functions to create a clone of
     * the VMs with all its Cloudlets, to simulate the initialization of a new
     * VM instance from a snapshot of the failed VM.
     *
     * @param vm VM to set to failed
     */
    private void setVmToFailed(final Vm vm) {
        if (Host.NULL.equals(lastFailedHost)) {
            return;
        }

        vm.setFailed(true);
        final DatacenterBroker broker = vm.getBroker();
        if(isVmClonerSet(broker) && isSomeVmWorking(broker)){
            LOGGER.info(
                "\t{} destroyed but not cloned, since there are {} VMs for the {} yet",
                vm, getRunningVmsNumber(broker), broker);
        }

        /*
         As the broker is expected to request vm creation and destruction,
         it is set here as the sender of the vm destroy request.
         */
        getSimulation().sendNow(broker, datacenter, CloudSimTag.VM_DESTROY, vm);
    }

    /**
     * Register 1 more fault happened which caused all VMs from a given broker
     * to fault.
     *
     * @param broker the broker to increase the number of faults
     */
    private void registerFaultOfAllVms(final DatacenterBroker broker) {
        vmFaultsByBroker.merge(broker, 1, Integer::sum);
    }

    /**
     * Gets the {@link VmCloner} object to clone a {@link Vm}.
     *
     * @param broker the broker the VM belongs to
     * @return the {@link VmCloner} object or {@link VmCloner#NULL} if no cloner was set
     */
    private VmCloner getVmCloner(final DatacenterBroker broker) {
        return vmClonerMap.getOrDefault(broker, VmCloner.NULL);
    }

    private boolean isSomeVmWorking(final DatacenterBroker broker) {
        return broker.getVmExecList().stream().anyMatch(Vm::isWorking);
    }

    private long getRunningVmsNumber(final DatacenterBroker broker) {
        return broker.getVmExecList().stream().filter(Vm::isWorking).count();
    }

    /**
     * Checks if a {@link VmCloner} is set to a given broker.
     * @param broker broker to check if it has a {@link VmCloner}.
     * @return true if the broker has a {@link VmCloner}, false otherwise
     */
    private boolean isVmClonerSet(final DatacenterBroker broker) {
        return vmClonerMap.getOrDefault(broker, VmCloner.NULL) != VmCloner.NULL;
    }

    /**
     * Gets the Datacenter's availability as a percentage value between 0 and 1,
     * based on VMs' downtime (the times VMs took to be repaired).
     * @return
     */
    public double availability() {
         return availability(null);
    }

    /**
     * Gets the availability for a given broker as a percentage value between 0 and 1,
     * based on VMs' downtime (the times VMs took to be repaired).
     *
     * @param broker the broker to get the availability of its VMs
     * @return
     */
    public double availability(final DatacenterBroker broker) {
        //no failure means 100% availability
        final double mtbf = meanTimeBetweenVmFaultsInMinutes(broker);
        if(mtbf == 0) {
            return 1;
        }

        final double mttr = meanTimeToRepairVmFaultsInMinutes(broker);
       // System.out.println(" Availability: broker " + broker + " value: " + mtbf / (mtbf + mttr));
        return mtbf / (mtbf + mttr);

    }

    /**
     * Gets the total number of faults happened for existing hosts.
     * This isn't the total number of failed hosts because one
     * host may fail multiple times.
     * @return
     */
    public int getHostFaultsNumber() {
        return hostFaultsNumber;
    }

    /**
     * Gets the total number of faults which affected all VMs from any broker.
     * @return
     * @see #getTotalFaultsNumber(DatacenterBroker)
     */
    public long getTotalFaultsNumber() {
        return vmFaultsByBroker.values().size();
    }

    /**
     * Gets the total number of Host faults which affected all VMs from a given broker
     * or VMs from all existing brokers.
     *
     * @param broker the broker to get the number of Host faults affecting its VMs
     * @return
     * @see #getTotalFaultsNumber()
     */
    public long getTotalFaultsNumber(final DatacenterBroker broker) {
        Objects.requireNonNull(broker);
        return vmFaultsByBroker.getOrDefault(broker, 0);
    }

    /**
     * Gets the average of the time (in minutes) all failed VMs belonging to a broker took
     * to recovery from failure.
     * See the method {@link #createVmCloneIfAllVmsDestroyed(DatacenterBroker, Vm)}
     * to understand the logic of the values in the recovery times map.
     * @return
     */
    private double totalVmsRecoveryTimeInMinutes(final DatacenterBroker broker) {
        final var timeStream = broker == null ?
                vmRecoveryTimeSecsMap.values().stream() :
                vmRecoveryTimeSecsMap.entrySet().stream()
                    .filter(entry -> broker.equals(entry.getKey().getBroker()))
                    .map(Map.Entry::getValue);

        final double recoverySeconds = timeStream
                                        .mapToDouble(secs -> secs >= 0 ? secs : getSimulation().clock() - Math.abs(secs))
                                        .sum();

        return TimeUtil.secondsToMinutes(recoverySeconds);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes.
     * Since Hosts don't actually recover from failures,
     * there aren't recovery time to make easier the computation
     * of MTBF for Host as it is directly computed for VMs.
     *
     * @return the current mean time (in minutes) between Host failures (MTBF)
     * or zero if no failures have happened yet
     * @see #meanTimeBetweenVmFaultsInMinutes()
     */
    public double meanTimeBetweenHostFaultsInMinutes() {
        final double[] faultTimes = hostFaultsTimeSecsMap
            .values()
            .stream()
            .flatMap(Collection::stream)
            .mapToDouble(time -> time)
            .sorted()
            .toArray();

        if(faultTimes.length == 0){
            return 0;
        }

        //Computes the differences between failure times t2 - t1
        double sum=0;
        double previous=faultTimes[0];
        for(final double time: faultTimes) {
            sum += time - previous;
            previous = time;
        }

        final double seconds = sum/faultTimes.length;
        return (long)(seconds/60.0);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes,
     * which affected VMs from any broker for the entire Datacenter.
     * It uses a straightforward way to compute the MTBF.
     * Since it's stored the VM recovery times, it's possible
     * to use such values to make easier the MTBF computation,
     * different from the Hosts MTBF.
     *
     * @return the current Mean Time Between host Failures (MTBF) in minutes
     * or zero if no VM was destroyed due to Host failure
     * @see #meanTimeBetweenHostFaultsInMinutes()
     */
    public double meanTimeBetweenVmFaultsInMinutes() {
        return meanTimeBetweenVmFaultsInMinutes(null);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes,
     * which affected VMs from a given broker.
     * It uses a straightforward way to compute the MTBF.
     * Since it's stored the VM recovery times, it's possible
     * to use such values to make easier the MTBF computation,
     * different from the Hosts MTBF.
     *
     * @param broker the broker to get the MTBF for
     * @return the current mean time (in minutes) between Host failures (MTBF)
     * or zero if no VM was destroyed due to Host failure
     * @see #meanTimeBetweenHostFaultsInMinutes()
     */
    public double meanTimeBetweenVmFaultsInMinutes(final DatacenterBroker broker) {
        final double faultsFromBroker = getTotalFaultsNumber(broker);
        if(faultsFromBroker == 0){
            return 0;
        }

        final double totalVmsRecoveryTimeInMinutes = meanTimeToRepairVmFaultsInMinutes(broker);
        return getSimulation().clockInMinutes() - totalVmsRecoveryTimeInMinutes;
    }

    /**
     * Computes the current Mean Time To Repair failures of VMs in minutes (MTTR)
     * in the Datacenter, for all existing brokers.
     *
     * @return the MTTR (in minutes) or zero if no VM was destroyed due to Host failure
     */
    public double meanTimeToRepairVmFaultsInMinutes() {
        return meanTimeToRepairVmFaultsInMinutes(null);
    }

    /**
     * Computes the current Mean Time To Repair Failures of VMs in minutes (MTTR)
     * belonging to given broker.
     * If a null broker is given, computes the MTTR of all VMs for all existing brokers.
     *
     * @param broker the broker to get the MTTR for or null if the MTTR is to be computed for all brokers
     * @return the current MTTR (in minutes) or zero if no VM was destroyed due to Host failure
     */
    public double meanTimeToRepairVmFaultsInMinutes(final DatacenterBroker broker) {
        final double faultsFromBroker = getTotalFaultsNumber(broker);
        if(faultsFromBroker == 0){
            return 0;
        }

        return totalVmsRecoveryTimeInMinutes(broker) / faultsFromBroker;
    }

    /**
     * Generates failures for a given number of PEs from the
     * {@link #getLastFailedHost() last failed Host}.
     * The minimum number of PEs to fail is 1.
     *
     * @param pesFailures number of PEs to set as failed
     * @return the number of PEs just failed for the Host, which is equals to the input number
     */
    private int generateHostPesFaults(final int pesFailures) {
        final var peList = lastFailedHost.getWorkingPeList()
            .stream()
            .limit(pesFailures)
            .collect(toList());

        ((HostSimple)lastFailedHost).setPeStatus(peList, Pe.Status.FAILED);

        return pesFailures;
    }

    /**
     * Gets the total number of PEs from all working VMs.
     * @return
     */
    private long getWorkingVmsPesCount() {
        return lastFailedHost.getVmList().stream()
                .filter(Vm::isWorking)
                .mapToLong(AbstractMachine::getNumberOfPes)
                .sum();
    }

    /**
     * Randomly generates a number of PEs which will fail for the datacenter.
     * The minimum number of PEs to fail is 1.
     *
     * @param host the Host to generate a number of PEs to fail
     * @return the generated number of failed PEs for the datacenter,
     * between [1 and Number of PEs].
     */
    private int randomFailedPesNumber(final Host host) {
        /* The random generator return values from [0 to 1]
         and multiplying by the number of PEs we get a number between
         0 and number of PEs. */
        return (int) (random.sample() * host.getWorkingPesNumber()) + 1;
    }

    /**
     * Gets the datacenter in which failures will be injected.
     *
     * @return
     */
    public Datacenter getDatacenter() {
        return datacenter;
    }

    /**
     * Sets the datacenter in which failures will be injected.
     *
     * @param datacenter the datacenter to set
     */
    protected final void setDatacenter(final Datacenter datacenter) {
        this.datacenter = requireNonNull(datacenter);
    }

    /**
     * Adds a {@link VmCloner} that creates a clone for the last failed {@link Vm}
     * belonging to a given broker, when all VMs of that broker have failed.
     *
     * This is optional. If a {@link VmCloner} is not set,
     * VMs will not be recovered from failures.
     *
     * @param broker the broker to set the VM cloner Function to
     * @param cloner the {@link VmCloner} to set
     */
    public void addVmCloner(final DatacenterBroker broker, final VmCloner cloner) {
        this.vmClonerMap.put(requireNonNull(broker), requireNonNull(cloner));
    }

    /**
     * Gets the last Host for which a failure was injected.
     *
     * @return the last failed Host or {@link Host#NULL} if not Host has failed
     * yet.
     */
    public Host getLastFailedHost() {
        return lastFailedHost;
    }

    /**
     * Gets a Pseudo Random Number used to give a
     * recovery time (in seconds) for each VM that was failed.
     * @return
     */
    public double getRandomRecoveryTimeForVmInSecs() {
        return random.sample()*MAX_VM_RECOVERY_TIME_SECS + 1;
    }

    /**
     * Gets the maximum time to generate a failure (in hours).
     * After that time, no failure will be generated.
     * @see #getMaxTimeToFailInSecs()
     */
    public double getMaxTimeToFailInHours() {
        return maxTimeToFailInHours;
    }

    /**
     * Gets the maximum time to generate a failure (in seconds).
     * After that time, no failure will be generated.
     * @see #getMaxTimeToFailInHours()
     */
    private double getMaxTimeToFailInSecs() {
        return maxTimeToFailInHours *3600;
    }

    /**
     * Sets the maximum time to generate a failure (in hours).
     * After that time, no failure will be generated.
     *
     * @param maxTimeToFailInHours the maximum time to set (in hours)
     */
    public void setMaxTimeToFailInHours(final double maxTimeToFailInHours) {
        this.maxTimeToFailInHours = maxTimeToFailInHours;
    }
}