org.cloudsimplus.faultinjection.HostFaultInjection Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cloudsim-plus Show documentation
CloudSim Plus: A modern, highly extensible and easier-to-use Java 8 Framework for Modeling and Simulation of Cloud Computing Infrastructures and Services
There is a newer version: 8.0.0
Show newest version
/*
 * CloudSim Plus: A modern, highly-extensible and easier-to-use Framework for
 * Modeling and Simulation of Cloud Computing Infrastructures and Services.
 * http://cloudsimplus.org
 *
 *     Copyright (C) 2015-2018 Universidade da Beira Interior (UBI, Portugal) and
 *     the Instituto Federal de Educação Ciência e Tecnologia do Tocantins (IFTO, Brazil).
 *
 *     This file is part of CloudSim Plus.
 *
 *     CloudSim Plus is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     CloudSim Plus is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with CloudSim Plus. If not, see .
 */
package org.cloudsimplus.faultinjection;

import org.cloudbus.cloudsim.brokers.DatacenterBroker;
import org.cloudbus.cloudsim.cloudlets.Cloudlet;
import org.cloudbus.cloudsim.core.CloudSimEntity;
import org.cloudbus.cloudsim.core.CloudSimTags;
import org.cloudbus.cloudsim.core.Machine;
import org.cloudbus.cloudsim.core.events.SimEvent;
import org.cloudbus.cloudsim.datacenters.Datacenter;
import org.cloudbus.cloudsim.distributions.ContinuousDistribution;
import org.cloudbus.cloudsim.distributions.PoissonDistr;
import org.cloudbus.cloudsim.distributions.UniformDistr;
import org.cloudbus.cloudsim.hosts.Host;
import org.cloudbus.cloudsim.resources.Pe;
import org.cloudbus.cloudsim.vms.Vm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Stream;

import static java.util.function.BinaryOperator.maxBy;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;

/**
 * Generates random failures for the {@link Pe}'s of {@link Host}s
 * inside a given {@link Datacenter}.
 * A Fault Injection object
 * usually has to be created after the VMs are created,
 * to make it easier to define a function to be used
 * to clone failed VMs.
 *
 * The events happens in the following order:
 * 
 *  a time to inject a Host failure is generated using a given Random Number Generator;
 *  a Host is randomly selected to fail at that time using an internal Uniform Random Number Generator with the same seed of the given generator;
 *  the number of Host PEs to fail is randomly generated using the internal generator;
 *  failed physical PEs are removed from affected VMs, VMs with no remaining PEs and destroying and clones of them are submitted to the {@link DatacenterBroker} of the failed VMs;
 *  another failure is scheduled for a future time using the given generator;
 *  the process repeats until the end of the simulation.
 * 
 *
 * 
 * When Host's PEs fail, if there are more available PEs
 * than the required by its running VMs, no VM will be affected.
 * 
 *
 * 
 * Considering that X is the number of failed PEs and it is
 * lower than the total available PEs.
 * In this case, the X PEs will be removed cyclically, 1 by 1,
 * from running VMs.
 * This way, some VMs may continue running
 * with less PEs than they requested initially.
 * On the other hand, if after the failure the number of Host working PEs
 * is lower than the required to run all VMs, some VMs will be
 * destroyed.
 * 
 *
 * 
 * If all PEs are removed from a VM, it is automatically destroyed
 * and a snapshot (clone) from it is taken and submitted
 * to the broker, so that the clone can start executing
 * into another host. In this case, all the cloudlets
 * which were running inside the VM yet, will be
 * cloned to and restart executing from the beginning.
 * 
 *
 * 
 * If a cloudlet running inside a VM which was affected by a PE failure
 * requires Y PEs but the VMs doesn't have such PEs anymore,
 * the Cloudlet will continue executing, but it will spend
 * more time to finish.
 * For instance, if a Cloudlet requires 2 PEs but after the failure
 * the VM was left with just 1 PE, the Cloudlet will spend the double
 * of the time to finish.
 * 
 *
 * 
 * NOTES:
 * 

 *     
 *      Host PEs failures may happen after all its VMs have finished executing.
 *      This way, the presented simulation results may show that the
 *      number of PEs into a Host is lower than the required by its VMs.
 *      In this case, the VMs shown in the results finished executing before
 *      some failures have happened. Analysing the logs is easy to
 *      confirm that.
 *      
 *      Failures inter-arrivals are defined in minutes, since seconds is a too
 *      small time unit to define such value. Furthermore, it doesn't make sense to define
 *      the number of failures per second. This way, the generator of failure arrival times
 *      given to the constructor considers the time in minutes, despite the simulation
 *      time unit is seconds. Since commonly Cloudlets just take some seconds to finish,
 *      mainly in simulation examples, failures may happen just after the cloudlets
 *      have finished. This way, one usually should make sure that Cloudlets' length
 *      are large enough to allow failures to happen before they end.
 *      
 * 
 * 
 *
 * For more details, check
 * Raysa Oliveira's Master Thesis (only in Portuguese).
 *
 * @author raysaoliveira
 * @since CloudSim Plus 1.2.0
 * @see SAP Blog: Availability vs Reliability
 *
 * @todo The class has multiple responsibilities.
 * The fault injection mechanism must be separated from
 * the fault recovery. The cloner methods are fault recovery.
 */
public class HostFaultInjection extends CloudSimEntity {
    private static final Logger logger = LoggerFactory.getLogger(HostFaultInjection.class.getSimpleName());

    /**
     * Maximum number of seconds for a VM to recovery from a failure,
     * which is randomly selected based on this value.
     * The recovery time is the delay that will be set
     * to start a clone from a failed VM.
     */
    private static final int MAX_VM_RECOVERY_TIME_SECS = 450;

    /**
     * @see #getLastFailedHost()
     */
    private Host lastFailedHost;

    /**
     * Number of PEs failed into the {@link #lastFailedHost}.
     */
    private int lastNumberOfFailedPes;

    /**
     * @see #getDatacenter()
     */
    private Datacenter datacenter;

    /**
     * A Pseudo Random Number Generator used to select a Host
     * and the number of PEs to set as fail.
     */
    private final ContinuousDistribution random;

    /**
     * A map that stores {@link VmCloner} objects to be used to clone
     * the VMs belonging to a broker.
     *
     * @see #addVmCloner(DatacenterBroker, VmCloner)
     */
    private final Map vmClonerMap;

    /**
     * A Pseudo Random Number Generator which generates the times (in hours)
     * that Hosts failures will occur.
     */
    private final ContinuousDistribution faultArrivalTimesGeneratorInHours;

    /**
     * The attribute counts how many host failures the simulation had
     */
    private int numberOfHostFaults;

    /**
     * A map to store the time (in seconds) VM failures took to be recovered,
     * which is when a clone from the last failed VM for a given broker is created.
     * Since a broker just creates a VM clone when all its VMs have failed,
     * only at that time the failure is in fact recovered.
     *
     * It means the time period failure of all VMs persisted
     * before a clone was created.
     */
    private final Map vmRecoveryTimeSecsMap;

    /**
     * A map to store the times (in seconds) for each Host failure.
     */
    private final Map> hostFaultsTimeSecsMap;

    /**
     * A map to store the number of failures that affected all VMs from each broker.
     */
    private final Map faultsOfAllVmsByBroker;


    private double maxTimeToGenerateFailureInHours;

    /**
     * Creates a fault injection mechanism for the Hosts of a given {@link Datacenter}.
     * The failures are randomly injected according to the given
     * mean of failures to be generated per minute,
     * which is also called event rate or rate parameter.
     *
     * @param datacenter the Datacenter to which failures will be randomly injected for its Hosts
     *
     * @param faultArrivalTimesGeneratorInHours a Pseudo Random Number Generator which generates the
     * times (in hours) Hosts failures will occur.
     * The values returned by the generator will be considered to be hours.
     * Frequently it is used a
     * {@link PoissonDistr} to generate failure arrivals, but any {@link ContinuousDistribution}
     * can be used.
     */
    public HostFaultInjection(final Datacenter datacenter, final ContinuousDistribution faultArrivalTimesGeneratorInHours) {
        super(datacenter.getSimulation());
        this.setDatacenter(datacenter);
        this.lastFailedHost = Host.NULL;
        this.faultArrivalTimesGeneratorInHours = faultArrivalTimesGeneratorInHours;
        this.random = new UniformDistr(faultArrivalTimesGeneratorInHours.getSeed()+1);
        this.vmRecoveryTimeSecsMap = new HashMap<>();
        this.hostFaultsTimeSecsMap = new HashMap<>();
        this.faultsOfAllVmsByBroker = new HashMap<>();
        this.vmClonerMap = new HashMap<>();
        this.maxTimeToGenerateFailureInHours = Double.MAX_VALUE;
    }

    @Override
    protected void startEntity() {
        scheduleFaultInjection();
    }

    /**
     * Schedules a message to be processed internally
     * to inject a Host PEs failure.
     */
    private void scheduleFaultInjection() {
        final long numOfOtherEvents =
            getSimulation()
                .getNumberOfFutureEvents(
                    evt -> evt.getTag() != CloudSimTags.HOST_FAILURE);
        /*
        Just re-schedule more failures if there are other events to be processed.
        Otherwise, the simulation has finished and no more failures should be scheduled.
        */

        if (numOfOtherEvents > 0 || getSimulation().clock() < getMaxTimeToGenerateFailureInSeconds()) {
            schedule(this, getTimeDelayForNextFault(), CloudSimTags.HOST_FAILURE);
        }
    }

    /**
     * Gets the time delay in seconds, from the current simulation time,
     * that the next failure will be injected.
     * Since the values returned by the {@link #faultArrivalTimesGeneratorInHours}
     * are considered to be in minutes, such values are converted to seconds.
     *
     * @return the next failure injection delay in seconds
     */
    private double getTimeDelayForNextFault() {
        return faultArrivalTimesGeneratorInHours.sample() * 3600;
    }

    @Override
    public void processEvent(final SimEvent ev) {
        switch (ev.getTag()) {
            case CloudSimTags.HOST_FAILURE:
                generateHostFault();
            break;
            default:
            break;
        }
    }

    /**
     * Generates a failure for a specific number of PEs from a
     * randomly selected Host.
     */
    private void generateHostFault() {
        try {
            final Host failedHost = getRandomHost();
            if(Host.NULL == failedHost || failedHost.getVmList().isEmpty()){
                return;
            }

            this.lastFailedHost = failedHost;
            if (Host.NULL.equals(lastFailedHost)) {
                return;
            }

            numberOfHostFaults++;
            registerHostFaultTime();

            final long previousNumOfWorkingPes = lastFailedHost.getNumberOfWorkingPes();
            this.lastNumberOfFailedPes = generateHostPesFaults();
            final long hostWorkingPes = lastFailedHost.getNumberOfWorkingPes();
            final long vmsRequiredPes = getPesSumOfWorkingVms();

            final String msg = lastFailedHost.getVmList().isEmpty() ? "" : " | VMs required PEs: " + vmsRequiredPes;
            logger.error(
                    "{}: {}: Generated {} PEs failures from {} previously working PEs for {} at minute {}.{}" +
                    "Current Working PEs: {} | Number of VMs: {}{}",
                    getSimulation().clock(), getClass().getSimpleName(), lastNumberOfFailedPes,
                    previousNumOfWorkingPes, lastFailedHost, getSimulation().clock() / 60, System.lineSeparator(),
                    hostWorkingPes, lastFailedHost.getVmList().size(), msg);

            if (hostWorkingPes == 0) {
                setAllVmsToFailed();
            } else if (hostWorkingPes >= vmsRequiredPes) {
                logNoVmFault();
            } else {
                deallocateFailedHostPesFromVms();
            }
        } finally {
            //schedules the next failure injection
            scheduleFaultInjection();
        }
    }

    /**
     * Register the time for a Host failure.
     */
    private void registerHostFaultTime() {
        hostFaultsTimeSecsMap.computeIfAbsent(lastFailedHost, h -> new ArrayList<>()).add(getSimulation().clock());
    }

    /**
     * Randomly gets a Host that will have some PEs set to failed.
     *
     * @return the randomly selected Host; or {@link Host#NULL} if the Datacenter
     * doesn't have Hosts or the selected one doesn't have more PEs.
     */
    private Host getRandomHost() {
        if (datacenter.getHostList().isEmpty()) {
            return Host.NULL;
        }

        final int i = (int) (random.sample() * datacenter.getHostList().size());
        return datacenter.getHost(i);
    }

    /**
     * Sets all VMs inside the {@link #getLastFailedHost() last failed Host} to
     * failed, when all Host PEs have failed.
     */
    private void setAllVmsToFailed() {
        logger.error(
                "All the {} PEs failed, affecting all its {} VMs.",
                lastFailedHost.getNumberOfPes(), lastFailedHost.getVmList().size());
        setVmListToFailed(lastFailedHost.getVmList());
    }

    /**
     * Shows that the failure of Host PEs hasn't affected any VM, because there
     * is more working PEs than required by all VMs.
     */
    private void logNoVmFault() {
        if(lastFailedHost.getVmList().isEmpty()){
            logger.info("\tThere aren't VMs running on the failed Host.");
            return;
        }

        final int vmsRequiredPes = (int) getPesSumOfWorkingVms();
        logger.info(
                "\tNumber of failed PEs is less than PEs required by all its {} VMs, thus it doesn't affect any VM.{}" +
                "Total PEs: {} | Total Failed PEs: {} | Working PEs: {} | Current PEs required by VMs: {}.\n",
                lastFailedHost.getVmList().size(), System.lineSeparator(),
                lastFailedHost.getNumberOfPes(), lastFailedHost.getNumberOfFailedPes(),
                lastFailedHost.getNumberOfWorkingPes(), vmsRequiredPes);
    }

    /**
     * Deallocates the physical PEs failed for the
     * {@link #getLastFailedHost() last failed Host} from affected VMs.
     */
    private void deallocateFailedHostPesFromVms() {
        logger.error("\t{} PEs just failed. There is a total of {} working PEs.",
                lastNumberOfFailedPes,
                lastFailedHost.getNumberOfWorkingPes());
        cyclicallyRemoveFailedHostPesFromVms();

        final List vmsWithoutPes =
            lastFailedHost.getVmList()
                .stream()
                .filter(vm -> vm.getNumberOfPes() == 0)
                .collect(toList());
        setVmListToFailed(vmsWithoutPes);
    }

    /**
     * Removes one physical failed PE from one affected VM at a time.
     * Affected VMs are dealt as a circular list, visiting
     * one VM at a time to remove 1 PE from it,
     * until all the failed PEs are removed.
     *
     */
    private void cyclicallyRemoveFailedHostPesFromVms() {
        int failedPesToRemoveFromVms = numberOfFailedPesToRemoveFromVms();
        List vmsWithPes = getVmsWithPEsFromFailedHost();
        final int affectedVms = Math.min(vmsWithPes.size(), failedPesToRemoveFromVms);

        logger.warn("\t{} VMs affected from a total of {}. {} PEs are going to be removed from them.",
                affectedVms, lastFailedHost.getVmList().size(), failedPesToRemoveFromVms);
        int i = 0;
        while (!vmsWithPes.isEmpty() && failedPesToRemoveFromVms-- > 0) {
            i = i % vmsWithPes.size();
            final Vm vm = vmsWithPes.get(i);
            lastFailedHost.getVmScheduler().deallocatePesFromVm(vm, 1);
            vm.getCloudletScheduler().deallocatePesFromVm(1);
            //remove 1 failed PE from the VM
            vm.getProcessor().deallocateAndRemoveResource(1);

            logger.warn(
                    "\tRemoving 1 PE from VM {} due to Host PE failure. New VM PEs Number: {}\n",
                    vm.getId(), vm.getNumberOfPes());
            i++;
            vmsWithPes = getVmsWithPEsFromFailedHost();
        }
    }

    private int numberOfFailedPesToRemoveFromVms() {
        final int hostWorkingPes = (int)lastFailedHost.getNumberOfWorkingPes();
        final int vmsRequiredPes = (int)getPesSumOfWorkingVms();
        return vmsRequiredPes - hostWorkingPes;
    }

    /**
     * Gets a List of VMs that have any PE from the {@link #lastFailedHost}.
     * @return
     */
    private List getVmsWithPEsFromFailedHost() {
        return lastFailedHost
                .getVmList()
                .stream()
                .filter(vm -> vm.getNumberOfPes() > 0)
                .collect(toList());
    }

    /**
     * Sets to failed all VMs from a given list due to
     * Host PEs failures.
     */
    private void setVmListToFailed(final List vms) {
        final Map lastVmFailedByBroker = getLastFailedVmByBroker(vms);

        vms.forEach(this::setVmToFailed);
        lastVmFailedByBroker.forEach(this::createVmCloneIfAllVmsDestroyed);
    }

    private Map getLastFailedVmByBroker(final List vmsWithoutPes) {
        final Comparator comparator = Comparator.comparingInt(Vm::getId);
        return vmsWithoutPes
                    .stream()
                    .collect(toMap(Vm::getBroker, identity(), maxBy(comparator)));
    }

    /**
     * Creates a VM for the last failed VM if all VMs belonging to the broker have failed
     * and the maximum number of clones to create was not reached.
     *
     * 
     * If all VMs have failed and a {@link VmCloner} is not set or the max number of
     * clones already was created, from the time of the failure
     * until the end of the simulation, this interval the customer
     * service is completely unavailable.
     *
     * Since the map below stores recovery times and not unavailability times,
     * it's being store the failure time as a negative value.
     * This way, when computing the availability for the customer,
     * these negative values are changed to: lastSimulationTime - |negativeRecoveryTime|.
     * Using this logic, is like the VM was recovered only in the end of the simulation.
     * It in fact is not recovered, but this logic has to be applied to
     * allow computing the availability.
     *
     * 
     * @param broker
     * @param lastVmFailedFromBroker
     */
    private void createVmCloneIfAllVmsDestroyed(final DatacenterBroker broker, final Vm lastVmFailedFromBroker) {
        if(isSomeVmWorking(broker)){
            return;
        }

        if(!isVmClonerSet(broker) || getVmCloner(broker).isMaxClonesNumberReached()) {
            vmRecoveryTimeSecsMap.put(lastVmFailedFromBroker, -getSimulation().clock());
        }

        if(!isVmClonerSet(broker)) {
            logger.warn("\tA Vm Cloner was not set for {}. So that VM failure will not be recovered.", broker);
            return;
        }

        final VmCloner cloner = getVmCloner(broker);
        if(cloner.isMaxClonesNumberReached()){
            logger.warn("\tThe maximum allowed number of {} VMs to create has been reached.", cloner.getMaxClonesNumber());
            return;
        }

        registerFaultOfAllVms(broker);
        final double recoveryTimeSecs = getRandomRecoveryTimeForVmInSecs();
        logger.info("\tTime to recovery from fault by cloning the failed VM: {} minutes", recoveryTimeSecs/60.0);

        final Map.Entry> entry = cloner.clone(lastVmFailedFromBroker);

        final Vm clonedVm = entry.getKey();
        final List clonedCloudlets = entry.getValue();
        clonedVm.setSubmissionDelay(recoveryTimeSecs);
        clonedVm.addOnHostAllocationListener(evt -> vmRecoveryTimeSecsMap.put(evt.getVm(), recoveryTimeSecs));
        broker.submitVm(clonedVm);
        broker.submitCloudletList(clonedCloudlets, recoveryTimeSecs);
    }

    /**
     * Sets a VM inside the {@link #getLastFailedHost() last failed Host} to
     * failed and use the VM and Cloudlets cloner functions to create a clone of
     * the VMs with all its Cloudlets, to simulate the initialization of a new
     * VM instance from a snapshot of the failed VM.
     *
     * @param vm VM to set to failed
     */
    private void setVmToFailed(final Vm vm) {
        if (Host.NULL.equals(lastFailedHost)) {
            return;
        }

        vm.setFailed(true);
        final DatacenterBroker broker = vm.getBroker();
        if(isVmClonerSet(broker) && isSomeVmWorking(broker)){
            logger.info(
                "\t{} destroyed but not cloned, since there are {} VMs for the {} yet",
                vm, getRunningVmsNumber(broker), broker);
        }

        /*
         As the broker is expected to request vm creation and destruction,
         it is set here as the sender of the vm destroy request.
         */
        getSimulation().sendNow(
                broker, datacenter,
                CloudSimTags.VM_DESTROY, vm);
    }

    /**
     * Register 1 more fault happened which caused all VMs from a given broker
     * to fault.
     *
     * @param broker the broker to increase the number of faults
     */
    private void registerFaultOfAllVms(final DatacenterBroker broker) {
        faultsOfAllVmsByBroker.merge(broker, 1, (old, inc) -> old + inc);
    }

    /**
     * Gets the {@link VmCloner} object to clone a {@link Vm}.
     *
     * @param broker the broker the VM belongs to
     * @return the {@link VmCloner} object or {@link VmCloner#NULL} if no cloner was set
     */
    private VmCloner getVmCloner(final DatacenterBroker broker) {
        return vmClonerMap.getOrDefault(broker, VmCloner.NULL);
    }

    private boolean isSomeVmWorking(final DatacenterBroker broker) {
        return broker.getVmExecList().stream().anyMatch(Vm::isWorking);
    }

    private long getRunningVmsNumber(final DatacenterBroker broker) {
        return broker.getVmExecList().stream().filter(Vm::isWorking).count();
    }

    /**
     * Checks if a {@link VmCloner} is set to a given broker.
     * @param broker broker to check if it has a {@link VmCloner}.
     * @return true if the broker has a {@link VmCloner}, false otherwise
     */
    private boolean isVmClonerSet(final DatacenterBroker broker) {
        return vmClonerMap.getOrDefault(broker, VmCloner.NULL) != VmCloner.NULL;
    }

    /**
     * Gets the total number of faults happened for existing hosts.
     * This isn't the total number of failed hosts because one
     * host may fail multiple times.
     * @return
     */
    public int getNumberOfHostFaults() {
        return numberOfHostFaults;
    }

    /**
     * Gets the Datacenter's availability as a percentage value between 0 to 1,
     * based on VMs' downtime (the times VMs took to be repaired).
     * @return
     */
    public double availability() {
         return availability(null);
    }

    /**
     * Gets the availability for a given broker as a percentage value between 0 to 1,
     * based on VMs' downtime (the times VMs took to be repaired).
     *
     * @param broker the broker to get the availability of its VMs
     * @return
     */
    public double availability(final DatacenterBroker broker) {
        //no failures means 100% availability
        final double mtbf = meanTimeBetweenVmFaultsInMinutes(broker);
        if(mtbf == 0) {
            return 1;
        }

        final double mttr = meanTimeToRepairVmFaultsInMinutes(broker);
       // System.out.println(" Availability: broker " + broker + " value: " + mtbf / (mtbf + mttr));
        return mtbf / (mtbf + mttr);

    }

    /**
     * Gets the total number of faults which affected all VMs from any broker.
     * @return
     */
    public long getNumberOfFaults() {
        return (long) faultsOfAllVmsByBroker.values().size();
    }

    /**
     * Gets the total number of Host faults which affected all VMs from a given broker
     * or VMs from all existing brokers.
     *
     * @param broker the broker to get the number of Host faults affecting its VMs or null
     *               whether is to be counted Host faults affecting VMs from any broker
     * @return
     */
    public long getNumberOfFaults(final DatacenterBroker broker) {
        if(broker == null){
            return getNumberOfFaults();
        }

        return faultsOfAllVmsByBroker.getOrDefault(broker, 0);
    }

    /**
     * Gets the average of the time (in minutes) all failed VMs belonging to a broker took to recovery
     * from failure.
     * See the method {@link #createVmCloneIfAllVmsDestroyed(DatacenterBroker, Vm)}
     * to understand the logic of the values in the recovery times map.
     * @return
     */
    private double totalVmsRecoveryTimeInMinutes(final DatacenterBroker broker) {
        final Stream stream = broker == null ?
                vmRecoveryTimeSecsMap.values().stream() :
                vmRecoveryTimeSecsMap.entrySet().stream()
                    .filter(entry -> broker.equals(entry.getKey().getBroker()))
                    .map(Map.Entry::getValue);

        final double seconds = stream.map(rt -> rt >= 0 ? rt : getSimulation().clock() - Math.abs(rt)).reduce(0.0, Double::sum);

        return (long)(seconds/60.0);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes.
     * Since Hosts don't actually recover from failures,
     * there aren't recovery time to make easier the computation
     * of MTBF for Host as it is directly computed for VMs.
     *
     * @return the current mean time (in minutes) between Host failures (MTBF)
     * or zero if no failures have happened yet
     * @see #meanTimeBetweenVmFaultsInMinutes()
     */
    public double meanTimeBetweenHostFaultsInMinutes() {
        final double[] faultTimes = hostFaultsTimeSecsMap
            .values()
            .stream()
            .flatMap(Collection::stream)
            .mapToDouble(v -> v)
            .sorted()
            .toArray();

        if(faultTimes.length == 0){
            return 0;
        }

        //computes the differences between failure times t2 - t1
        double sum=0;
        double previous=faultTimes[0];
        for(final double v: faultTimes) {
            sum += (v - previous);
            previous = v;
        }

        final double seconds = sum/faultTimes.length;
        return (long)(seconds/60.0);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes,
     * which affected VMs from any broker for the entire Datacenter.
     * It uses a straightforward way to compute the MTBF.
     * Since it's stored the VM recovery times, it's possible
     * to use such values to make easier the MTBF computation,
     * different from the Hosts MTBF.
     *
     * @return the current Mean Time Between host Failures (MTBF) in minutes
     * or zero if no VM was destroyed due to Host failure
     * @see #meanTimeBetweenHostFaultsInMinutes()
     */
    public double meanTimeBetweenVmFaultsInMinutes() {
        return meanTimeBetweenVmFaultsInMinutes(null);
    }

    /**
     * Computes the current Mean Time Between host Failures (MTBF) in minutes,
     * which affected VMs from a given broker.
     * It uses a straightforward way to compute the MTBF.
     * Since it's stored the VM recovery times, it's possible
     * to use such values to make easier the MTBF computation,
     * different from the Hosts MTBF.
     *
     * @param broker the broker to get the MTBF for
     * @return the current mean time (in minutes) between Host failures (MTBF)
     * or zero if no VM was destroyed due to Host failure
     * @see #meanTimeBetweenHostFaultsInMinutes()
     */
    public double meanTimeBetweenVmFaultsInMinutes(final DatacenterBroker broker) {
        final double faultsFromBroker = getNumberOfFaults(broker);
        if(faultsFromBroker == 0){
            return 0;
        }

        final double totalVmsRecoveryTimeInMinutes = meanTimeToRepairVmFaultsInMinutes(broker);
        return getSimulation().clockInMinutes() - totalVmsRecoveryTimeInMinutes;
    }

    /**
     * Computes the current Mean Time To Repair failures of VMs in minutes (MTTR)
     * in the Datacenter, for all existing brokers.
     *
     * @return the MTTR (in minutes) or zero if no VM was destroyed due to Host failure
     */
    public double meanTimeToRepairVmFaultsInMinutes() {
        return meanTimeToRepairVmFaultsInMinutes(null);
    }

    /**
     * Computes the current Mean Time To Repair Failures of VMs in minutes (MTTR)
     * belonging to given broker.
     * If a null broker is given, computes the MTTR of all VMs for all existing brokers.
     *
     * @param broker the broker to get the MTTR for or null if the MTTR is to be computed for all brokers
     * @return the current MTTR (in minutes) or zero if no VM was destroyed due to Host failure
     */
    public double meanTimeToRepairVmFaultsInMinutes(final DatacenterBroker broker) {
        final double faultsFromBroker = getNumberOfFaults(broker);
        if(faultsFromBroker == 0){
            return 0;
        }
        return totalVmsRecoveryTimeInMinutes(broker) / faultsFromBroker;
    }

    /**
     * Generates random failures for the PEs from the
     * {@link #getLastFailedHost() last failed Host}.
     * The minimum number of PEs to fail is 1.
     *
     * @return the number of failed PEs for the Host
     */
    private int generateHostPesFaults() {
        return (int) lastFailedHost.getWorkingPeList()
                .stream()
                .limit(randomNumberOfFailedPes())
                .peek(pe -> pe.setStatus(Pe.Status.FAILED))
                .count();
    }

    /**
     * Gets the total number of PEs from all working VMs.
     * @return
     */
    private long getPesSumOfWorkingVms() {
        return lastFailedHost.getVmList().stream()
                .filter(Vm::isWorking)
                .mapToLong(Machine::getNumberOfPes)
                .sum();
    }

    /**
     * Randomly generates a number of PEs which will fail for the datacenter.
     * The minimum number of PEs to fail is 1.
     *
     * @return the generated number of failed PEs for the datacenter,
     * between [1 and Number of PEs].
     */
    private int randomNumberOfFailedPes() {
        /*the random generator return values from [0 to 1]
         and multiplying by the number of PEs we get a number between
         0 and number of PEs*/
        return (int) (random.sample() * lastFailedHost.getWorkingPeList().size()) + 1;
    }

    /**
     * Gets the datacenter in which failures will be injected.
     *
     * @return
     */
    public Datacenter getDatacenter() {
        return datacenter;
    }

    /**
     * Sets the datacenter in which failures will be injected.
     *
     * @param datacenter the datacenter to set
     */
    protected final void setDatacenter(final Datacenter datacenter) {
        Objects.requireNonNull(datacenter);
        this.datacenter = datacenter;
    }

    /**
     * Adds a {@link VmCloner} that creates a clone for the last failed {@link Vm} belonging to a given broker,
     * when all VMs of that broker have failed.
     *
     * This is optional. If a {@link VmCloner} is not set,
     * VMs will not be recovered from failures.
     *
     * @param broker the broker to set the VM cloner Function to
     * @param cloner the {@link VmCloner} to set
     */
    public void addVmCloner(final DatacenterBroker broker, final VmCloner cloner) {
        Objects.requireNonNull(broker);
        Objects.requireNonNull(cloner);
        this.vmClonerMap.put(broker, cloner);
    }

    /**
     * Gets the last Host for which a failure was injected.
     *
     * @return the last failed Host or {@link Host#NULL} if not Host has failed
     * yet.
     */
    public Host getLastFailedHost() {
        return lastFailedHost;
    }

    /**
     * Gets a Pseudo Random Number used to give a
     * recovery time (in seconds) for each VM that was failed.
     * @return
     */
    public double getRandomRecoveryTimeForVmInSecs() {
        return random.sample()*MAX_VM_RECOVERY_TIME_SECS + 1;
    }

    /**
     * Gets the max time to generate a failure (in hours)
     */
    public double getMaxTimeToGenerateFailureInHours() {
        return maxTimeToGenerateFailureInHours;
    }

    /**
     * Gets the max time to generate a failure (in seconds)
     */
    private double getMaxTimeToGenerateFailureInSeconds() {
        return maxTimeToGenerateFailureInHours*3600;
    }

    /**
     * Sets the max time to generate a failure (in hours).
     * @param maxTimeToGenerateFailureInHours the maximum time to set
     */
    public void setMaxTimeToGenerateFailureInHours(final double maxTimeToGenerateFailureInHours) {
        this.maxTimeToGenerateFailureInHours = maxTimeToGenerateFailureInHours;
    }
}