All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cloudsimplus.traces.google.GoogleTaskUsageTraceReader Maven / Gradle / Ivy

Go to download

CloudSim Plus: A modern, highly extensible and easier-to-use Java 8+ Framework for Modeling and Simulation of Cloud Computing Infrastructures and Services

There is a newer version: 8.0.0
Show newest version
/*
 * CloudSim Plus: A modern, highly-extensible and easier-to-use Framework for
 * Modeling and Simulation of Cloud Computing Infrastructures and Services.
 * http://cloudsimplus.org
 *
 *     Copyright (C) 2015-2021 Universidade da Beira Interior (UBI, Portugal) and
 *     the Instituto Federal de Educação Ciência e Tecnologia do Tocantins (IFTO, Brazil).
 *
 *     This file is part of CloudSim Plus.
 *
 *     CloudSim Plus is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     CloudSim Plus is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with CloudSim Plus. If not, see .
 */
package org.cloudsimplus.traces.google;

import org.cloudbus.cloudsim.brokers.DatacenterBroker;
import org.cloudbus.cloudsim.cloudlets.Cloudlet;
import org.cloudbus.cloudsim.core.CloudSimTag;
import org.cloudbus.cloudsim.core.Simulation;
import org.cloudbus.cloudsim.core.events.CloudSimEvent;
import org.cloudbus.cloudsim.util.ResourceLoader;
import org.cloudbus.cloudsim.util.TimeUtil;
import org.cloudbus.cloudsim.util.TraceReaderAbstract;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModel;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModelDynamic;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModelFull;
import org.cloudsimplus.listeners.EventInfo;

import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import static java.util.Objects.requireNonNull;

/**
 * Process "task usage" trace files from
 * Google Cluster Data
 * to change the resource utilization of {@link Cloudlet}s.
 * The trace files are the ones inside the task_usage sub-directory of downloaded Google traces.
 * The instructions to download the traces are provided in the link above.
 *
 * 

A spreadsheet that makes it easier to understand the structure of trace files is provided * in docs/google-cluster-data-samples.xlsx

* *

The documentation for fields and values were obtained from the Google Cluster trace documentation in the link above. * It's strongly recommended to read such a documentation before trying to use this class.

* *

Check important details at {@link TraceReaderAbstract}.

* * @see #process() * * @author Manoel Campos da Silva Filho * @since CloudSim Plus 4.0.0 */ public final class GoogleTaskUsageTraceReader extends GoogleTraceReaderAbstract { private final List cloudletUsageChangeEvents; /** * A {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file. */ private final GoogleTaskEventsTraceReader taskEventsReader; /** * The index of each field in the trace file. */ public enum FieldIndex implements TraceField { /** * 0: The index of the field containing the start time​ of the measurement period (stored in microsecond * but converted to seconds when read from the file). */ START_TIME{ /** * Gets the start time converted to seconds. * @param reader the reader for the trace file * @return */ @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return TimeUtil.microToSeconds(reader.getFieldDoubleValue(this)); } }, /** * 1: The index of the field containing the end time​ of the measurement period (stored in microsecond * but converted to seconds when read from the file). */ END_TIME{ /** * Gets the end time converted to seconds. * @param reader the reader for the trace file * @return */ @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return TimeUtil.microToSeconds(reader.getFieldDoubleValue(this)); } }, /** * 2: The index of the field containing the id of the job this task belongs to. */ JOB_ID{ @Override public Long getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldLongValue(this); } }, /** * 3: The index of the field containing the task index within the job. */ TASK_INDEX{ @Override public Long getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldLongValue(this); } }, /** * 4: The index of the field containing the machineID. * If the field is present, indicates the machine onto which the task was scheduled, * otherwise, the reader will return -1 as default value. */ MACHINE_ID{ @Override public Long getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldLongValue(this, -1); } }, /** * 5: The index of the field containing the mean CPU usage rate (in percentage from 0 to 1). */ MEAN_CPU_USAGE_RATE{ @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 6: The index of the field containing the canonical memory usage, * i.e., the number of user accessible pages, * including page cache but excluding some pages marked as stale. */ CANONICAL_MEMORY_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 7: The index of the field containing the assigned memory usage, * i.e., memory usage based on the memory actually assigned (but not necessarily used) * to the container where the task was running inside the * Google Cluster. */ ASSIGNED_MEMORY_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 8: The index of the field containing the unmapped page cache memory usage, * i.e., Linux page cache (file-backed memory) not mapped into any userspace process. */ UNMAPPED_PAGE_CACHE_MEMORY_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 9: The index of the field containing the total page cache memory usage, * i.e., the total Linux page cache (file-backed memory). */ TOTAL_PAGE_CACHE_MEMORY_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 10: The index of the field containing the maximum memory usage, * i.e., the maximum value of the canonical memory usage * measurement observed over the measurement interval. * This value is not available for some tasks. */ MAXIMUM_MEMORY_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, -1); } }, /** * 11: The index of the field containing the mean disk I/O time. */ MEAN_DISK_IO_TIME { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 12: The index of the field containing the mean local disk space used. * Represents runtime local disk capacity usage. * Disk usage required for binaries and other read-only, pre-staged runtime files is ​not​included. * Additionally, most disk space used by distributed, persistent storage (e.g. GFS, Colossus) * is not accounted for in this trace. */ MEAN_LOCAL_DISK_SPACE_USED { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, 0); } }, /** * 13: The index of the field containing the maximum CPU usage * observed over the measurement interval. */ MAXIMUM_CPU_USAGE { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, -1); } }, /** * 14: The index of the field containing the maximum disk IO time * observed over the measurement interval. */ MAXIMUM_DISK_IO_TIME { @Override public Double getValue(final GoogleTaskUsageTraceReader reader) { return reader.getFieldDoubleValue(this, -1); } } } private final Simulation simulation; /** * Gets a {@link GoogleTaskUsageTraceReader} instance to read a "task usage" trace file * inside the application's resource directory. * * @param taskEventsReader a {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file * @param filePath the workload trace relative file name in one of the following formats: ASCII text, zip, gz. * @throws IllegalArgumentException when the trace file name is null or empty * @throws UncheckedIOException when the file cannot be accessed (such as when it doesn't exist) * @see #process() */ public static GoogleTaskUsageTraceReader getInstance( final GoogleTaskEventsTraceReader taskEventsReader, final String filePath) { final InputStream reader = ResourceLoader.newInputStream(filePath, GoogleTaskUsageTraceReader.class); return new GoogleTaskUsageTraceReader(taskEventsReader, filePath, reader); } /** * Instantiates a {@link GoogleTaskUsageTraceReader} to read a "task usage" from a given InputStream. * * @param taskEventsReader a {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file * @param filePath the workload trace relative file name in one of the following formats: ASCII text, zip, gz. * @param reader a {@link InputStream} object to read the file * @throws IllegalArgumentException when the trace file name is null or empty * @throws UncheckedIOException when the file cannot be accessed (such as when it doesn't exist) * @see #process() */ private GoogleTaskUsageTraceReader( final GoogleTaskEventsTraceReader taskEventsReader, final String filePath, final InputStream reader) { super(filePath, reader); this.taskEventsReader = requireNonNull(taskEventsReader); final var brokerList = getBrokers(); if(brokerList.isEmpty()){ throw new IllegalArgumentException("The broker list in your GoogleTaskEventsTraceReader is empty"); } this.simulation = brokerList.get(0).getSimulation(); cloudletUsageChangeEvents = new ArrayList<>(); } private List getBrokers() { return taskEventsReader.getBrokerManager().getBrokers(); } /** * Process {@link #getFilePath() trace file} requests to change resource usage of {@link Cloudlet}s * as described in the file. It returns the List of all processed {@link Cloudlet}s. * *

When using a {@link GoogleTaskEventsTraceReader} * and you create Cloudlets with an {@link UtilizationModelFull} to define that required CPUs * will be used in 100% of their capacity, * if the "task usage" file is read, a different CPU usage can be set. * In regular simulations, if this value is smaller, * a Cloudlet will spend more time to finish. * However, since the "task events" file defines the exact time to finish * each Cloudlet, using less than 100% of the CPU capacity won't make the Cloudlet to finish * later (as in simulations not using the Google Cluster Data). * Each Cloudlet will just have a smaller length at the end of the simulation.

* *

These trace files don't define the length of the Cloudlet (task). * This way, Cloudlets are created with an indefinite length * (see {@link Cloudlet#setLength(long)}) and the length is increased * as the Cloudlet is executed. Therefore, if the Cloudlet is using * a higher percentage of the CPU capacity, it will execute * more instructions in a given time interval.

* *

In conclusion, the exec and finish time of Cloudlets created * from Google Cluster trace files won't change according * to the percentage of CPU capacity the Cloudlets are using.

* * @return the Set of all {@link Cloudlet}s processed according to a line in the trace file */ @Override public Collection process() { return super.process(); } /** There is not pre-process for this implementation. */ @Override protected void preProcess(){/**/} @Override protected void postProcess(){ simulation.addOnSimulationStartListener(this::onSimulationStart); } /** * Adds an event listener that is notified when the simulation starts, * so that the messages to change Cloudlet resource usage are sent. * * @param info the simulation start event information */ private void onSimulationStart(final EventInfo info) { cloudletUsageChangeEvents.forEach(evt -> evt.getSource().schedule(evt)); } @Override protected boolean processParsedLineInternal() { final TaskUsage taskUsage = new TaskUsage(this); return taskEventsReader .findObject(taskUsage.getUniqueTaskId()) .map(cloudlet -> requestCloudletUsageChange(cloudlet, taskUsage)) .isPresent(); } /** * Send a message to the broker to request change in a Cloudlet resource usage. * @return true if the request was created, false otherwise */ private boolean requestCloudletUsageChange(final Cloudlet cloudlet, final TaskUsage taskUsage) { final Runnable resourceUsageUpdateRunnable = () -> { final StringBuilder builder = new StringBuilder(); if (cloudlet.getUtilizationOfCpu() != taskUsage.getMeanCpuUsageRate()) { builder.append("CPU Utilization: ") .append(formatPercentValue(cloudlet.getUtilizationOfCpu())).append(VAL_SEPARATOR) .append(formatPercentValue(taskUsage.getMeanCpuUsageRate())).append('%').append(COL_SEPARATOR); cloudlet.setUtilizationModelCpu(createUtilizationModel(cloudlet.getUtilizationModelCpu(), taskUsage.getMeanCpuUsageRate())); } if (cloudlet.getUtilizationOfRam() != taskUsage.getCanonicalMemoryUsage()) { builder.append("RAM Utilization: ") .append(formatPercentValue(cloudlet.getUtilizationOfRam())).append(VAL_SEPARATOR) .append(formatPercentValue(taskUsage.getCanonicalMemoryUsage())) .append('%') .append(COL_SEPARATOR); cloudlet.setUtilizationModelRam(createUtilizationModel(cloudlet.getUtilizationModelRam(), taskUsage.getCanonicalMemoryUsage())); } /* We don't need to check if some resource was changed because * if this Runnable is executed is because something was. * An event to execute such Runnable is just sent in such a condition.*/ final DatacenterBroker broker = cloudlet.getBroker(); broker.LOGGER.trace("{}: {}: {} resource usage changed: {}", simulation.clockStr(), broker.getName(), cloudlet, builder); cloudlet.getVm().getHost().updateProcessing(simulation.clock()); }; if(hasCloudletResourceUsageChanged(cloudlet, taskUsage)){ addAvailableObject(cloudlet); final CloudSimEvent evt = new CloudSimEvent( taskUsage.getStartTime(), cloudlet.getBroker(), CloudSimTag.CLOUDLET_UPDATE_ATTRIBUTES, resourceUsageUpdateRunnable); return cloudletUsageChangeEvents.add(evt); } return false; } /** * Creates a {@link UtilizationModel} based on another one. * If the given instance is a {@link UtilizationModelDynamic}, * otherwise a UtilizationModelDynamic is created without cloning * another instance (that means it won't have the configurations * defined by another model). * Then, the initial utilization of the created {@link UtilizationModelDynamic} * is set as the given parameter. * * @param source the utilization model that will be used as based to * @param initialUtilization a percentage value (in scale from 0 to 1) * to define the current utilization for the created {@link UtilizationModelDynamic} * @return an {@link UtilizationModelDynamic} instance with the current utilization equals * to the given parameter */ private UtilizationModel createUtilizationModel(final UtilizationModel source, final double initialUtilization){ if(source instanceof UtilizationModelDynamic umDynamic){ return new UtilizationModelDynamic(umDynamic, initialUtilization); } return new UtilizationModelDynamic(initialUtilization); } private boolean hasCloudletResourceUsageChanged(final Cloudlet cloudlet, final TaskUsage taskUsage){ return cloudlet.getUtilizationOfCpu() != taskUsage.getMeanCpuUsageRate() || cloudlet.getUtilizationOfRam() != taskUsage.getCanonicalMemoryUsage(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy