org.cloudsimplus.traces.google.GoogleTaskUsageTraceReader Maven / Gradle / Ivy
Show all versions of cloudsim-plus Show documentation
/*
* CloudSim Plus: A modern, highly-extensible and easier-to-use Framework for
* Modeling and Simulation of Cloud Computing Infrastructures and Services.
* http://cloudsimplus.org
*
* Copyright (C) 2015-2021 Universidade da Beira Interior (UBI, Portugal) and
* the Instituto Federal de Educação Ciência e Tecnologia do Tocantins (IFTO, Brazil).
*
* This file is part of CloudSim Plus.
*
* CloudSim Plus is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CloudSim Plus is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CloudSim Plus. If not, see .
*/
package org.cloudsimplus.traces.google;
import org.cloudbus.cloudsim.brokers.DatacenterBroker;
import org.cloudbus.cloudsim.cloudlets.Cloudlet;
import org.cloudbus.cloudsim.core.CloudSimTag;
import org.cloudbus.cloudsim.core.Simulation;
import org.cloudbus.cloudsim.core.events.CloudSimEvent;
import org.cloudbus.cloudsim.util.ResourceLoader;
import org.cloudbus.cloudsim.util.TimeUtil;
import org.cloudbus.cloudsim.util.TraceReaderAbstract;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModel;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModelDynamic;
import org.cloudbus.cloudsim.utilizationmodels.UtilizationModelFull;
import org.cloudsimplus.listeners.EventInfo;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import static java.util.Objects.requireNonNull;
/**
* Process "task usage" trace files from
* Google Cluster Data
* to change the resource utilization of {@link Cloudlet}s.
* The trace files are the ones inside the task_usage sub-directory of downloaded Google traces.
* The instructions to download the traces are provided in the link above.
*
* A spreadsheet that makes it easier to understand the structure of trace files is provided
* in docs/google-cluster-data-samples.xlsx
*
* The documentation for fields and values were obtained from the Google Cluster trace documentation in the link above.
* It's strongly recommended to read such a documentation before trying to use this class.
*
* Check important details at {@link TraceReaderAbstract}.
*
* @see #process()
*
* @author Manoel Campos da Silva Filho
* @since CloudSim Plus 4.0.0
*/
public final class GoogleTaskUsageTraceReader extends GoogleTraceReaderAbstract {
private final List cloudletUsageChangeEvents;
/**
* A {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file.
*/
private final GoogleTaskEventsTraceReader taskEventsReader;
/**
* The index of each field in the trace file.
*/
public enum FieldIndex implements TraceField {
/**
* 0: The index of the field containing the start time of the measurement period (stored in microsecond
* but converted to seconds when read from the file).
*/
START_TIME{
/**
* Gets the start time converted to seconds.
* @param reader the reader for the trace file
* @return
*/
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return TimeUtil.microToSeconds(reader.getFieldDoubleValue(this));
}
},
/**
* 1: The index of the field containing the end time of the measurement period (stored in microsecond
* but converted to seconds when read from the file).
*/
END_TIME{
/**
* Gets the end time converted to seconds.
* @param reader the reader for the trace file
* @return
*/
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return TimeUtil.microToSeconds(reader.getFieldDoubleValue(this));
}
},
/**
* 2: The index of the field containing the id of the job this task belongs to.
*/
JOB_ID{
@Override
public Long getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldLongValue(this);
}
},
/**
* 3: The index of the field containing the task index within the job.
*/
TASK_INDEX{
@Override
public Long getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldLongValue(this);
}
},
/**
* 4: The index of the field containing the machineID.
* If the field is present, indicates the machine onto which the task was scheduled,
* otherwise, the reader will return -1 as default value.
*/
MACHINE_ID{
@Override
public Long getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldLongValue(this, -1);
}
},
/**
* 5: The index of the field containing the mean CPU usage rate (in percentage from 0 to 1).
*/
MEAN_CPU_USAGE_RATE{
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 6: The index of the field containing the canonical memory usage,
* i.e., the number of user accessible pages,
* including page cache but excluding some pages marked as stale.
*/
CANONICAL_MEMORY_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 7: The index of the field containing the assigned memory usage,
* i.e., memory usage based on the memory actually assigned (but not necessarily used)
* to the container where the task was running inside the
* Google Cluster.
*/
ASSIGNED_MEMORY_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 8: The index of the field containing the unmapped page cache memory usage,
* i.e., Linux page cache (file-backed memory) not mapped into any userspace process.
*/
UNMAPPED_PAGE_CACHE_MEMORY_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 9: The index of the field containing the total page cache memory usage,
* i.e., the total Linux page cache (file-backed memory).
*/
TOTAL_PAGE_CACHE_MEMORY_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 10: The index of the field containing the maximum memory usage,
* i.e., the maximum value of the canonical memory usage
* measurement observed over the measurement interval.
* This value is not available for some tasks.
*/
MAXIMUM_MEMORY_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, -1);
}
},
/**
* 11: The index of the field containing the mean disk I/O time.
*/
MEAN_DISK_IO_TIME {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 12: The index of the field containing the mean local disk space used.
* Represents runtime local disk capacity usage.
* Disk usage required for binaries and other read-only, pre-staged runtime files is notincluded.
* Additionally, most disk space used by distributed, persistent storage (e.g. GFS, Colossus)
* is not accounted for in this trace.
*/
MEAN_LOCAL_DISK_SPACE_USED {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, 0);
}
},
/**
* 13: The index of the field containing the maximum CPU usage
* observed over the measurement interval.
*/
MAXIMUM_CPU_USAGE {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, -1);
}
},
/**
* 14: The index of the field containing the maximum disk IO time
* observed over the measurement interval.
*/
MAXIMUM_DISK_IO_TIME {
@Override
public Double getValue(final GoogleTaskUsageTraceReader reader) {
return reader.getFieldDoubleValue(this, -1);
}
}
}
private final Simulation simulation;
/**
* Gets a {@link GoogleTaskUsageTraceReader} instance to read a "task usage" trace file
* inside the application's resource directory.
*
* @param taskEventsReader a {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file
* @param filePath the workload trace relative file name in one of the following formats: ASCII text, zip, gz.
* @throws IllegalArgumentException when the trace file name is null or empty
* @throws UncheckedIOException when the file cannot be accessed (such as when it doesn't exist)
* @see #process()
*/
public static GoogleTaskUsageTraceReader getInstance(
final GoogleTaskEventsTraceReader taskEventsReader,
final String filePath)
{
final InputStream reader = ResourceLoader.newInputStream(filePath, GoogleTaskUsageTraceReader.class);
return new GoogleTaskUsageTraceReader(taskEventsReader, filePath, reader);
}
/**
* Instantiates a {@link GoogleTaskUsageTraceReader} to read a "task usage" from a given InputStream.
*
* @param taskEventsReader a {@link GoogleTaskEventsTraceReader} used to create Cloudlets from a task events trace file
* @param filePath the workload trace relative file name in one of the following formats: ASCII text, zip, gz.
* @param reader a {@link InputStream} object to read the file
* @throws IllegalArgumentException when the trace file name is null or empty
* @throws UncheckedIOException when the file cannot be accessed (such as when it doesn't exist)
* @see #process()
*/
private GoogleTaskUsageTraceReader(
final GoogleTaskEventsTraceReader taskEventsReader,
final String filePath,
final InputStream reader)
{
super(filePath, reader);
this.taskEventsReader = requireNonNull(taskEventsReader);
final var brokerList = getBrokers();
if(brokerList.isEmpty()){
throw new IllegalArgumentException("The broker list in your GoogleTaskEventsTraceReader is empty");
}
this.simulation = brokerList.get(0).getSimulation();
cloudletUsageChangeEvents = new ArrayList<>();
}
private List getBrokers() {
return taskEventsReader.getBrokerManager().getBrokers();
}
/**
* Process {@link #getFilePath() trace file} requests to change resource usage of {@link Cloudlet}s
* as described in the file. It returns the List of all processed {@link Cloudlet}s.
*
* When using a {@link GoogleTaskEventsTraceReader}
* and you create Cloudlets with an {@link UtilizationModelFull} to define that required CPUs
* will be used in 100% of their capacity,
* if the "task usage" file is read, a different CPU usage can be set.
* In regular simulations, if this value is smaller,
* a Cloudlet will spend more time to finish.
* However, since the "task events" file defines the exact time to finish
* each Cloudlet, using less than 100% of the CPU capacity won't make the Cloudlet to finish
* later (as in simulations not using the Google Cluster Data).
* Each Cloudlet will just have a smaller length at the end of the simulation.
*
* These trace files don't define the length of the Cloudlet (task).
* This way, Cloudlets are created with an indefinite length
* (see {@link Cloudlet#setLength(long)}) and the length is increased
* as the Cloudlet is executed. Therefore, if the Cloudlet is using
* a higher percentage of the CPU capacity, it will execute
* more instructions in a given time interval.
*
* In conclusion, the exec and finish time of Cloudlets created
* from Google Cluster trace files won't change according
* to the percentage of CPU capacity the Cloudlets are using.
*
* @return the Set of all {@link Cloudlet}s processed according to a line in the trace file
*/
@Override
public Collection process() {
return super.process();
}
/** There is not pre-process for this implementation. */
@Override
protected void preProcess(){/**/}
@Override
protected void postProcess(){
simulation.addOnSimulationStartListener(this::onSimulationStart);
}
/**
* Adds an event listener that is notified when the simulation starts,
* so that the messages to change Cloudlet resource usage are sent.
*
* @param info the simulation start event information
*/
private void onSimulationStart(final EventInfo info) {
cloudletUsageChangeEvents.forEach(evt -> evt.getSource().schedule(evt));
}
@Override
protected boolean processParsedLineInternal() {
final TaskUsage taskUsage = new TaskUsage(this);
return taskEventsReader
.findObject(taskUsage.getUniqueTaskId())
.map(cloudlet -> requestCloudletUsageChange(cloudlet, taskUsage))
.isPresent();
}
/**
* Send a message to the broker to request change in a Cloudlet resource usage.
* @return true if the request was created, false otherwise
*/
private boolean requestCloudletUsageChange(final Cloudlet cloudlet, final TaskUsage taskUsage)
{
final Runnable resourceUsageUpdateRunnable = () -> {
final StringBuilder builder = new StringBuilder();
if (cloudlet.getUtilizationOfCpu() != taskUsage.getMeanCpuUsageRate()) {
builder.append("CPU Utilization: ")
.append(formatPercentValue(cloudlet.getUtilizationOfCpu())).append(VAL_SEPARATOR)
.append(formatPercentValue(taskUsage.getMeanCpuUsageRate())).append('%').append(COL_SEPARATOR);
cloudlet.setUtilizationModelCpu(createUtilizationModel(cloudlet.getUtilizationModelCpu(), taskUsage.getMeanCpuUsageRate()));
}
if (cloudlet.getUtilizationOfRam() != taskUsage.getCanonicalMemoryUsage()) {
builder.append("RAM Utilization: ")
.append(formatPercentValue(cloudlet.getUtilizationOfRam())).append(VAL_SEPARATOR)
.append(formatPercentValue(taskUsage.getCanonicalMemoryUsage()))
.append('%')
.append(COL_SEPARATOR);
cloudlet.setUtilizationModelRam(createUtilizationModel(cloudlet.getUtilizationModelRam(), taskUsage.getCanonicalMemoryUsage()));
}
/* We don't need to check if some resource was changed because
* if this Runnable is executed is because something was.
* An event to execute such Runnable is just sent in such a condition.*/
final DatacenterBroker broker = cloudlet.getBroker();
broker.LOGGER.trace("{}: {}: {} resource usage changed: {}", simulation.clockStr(), broker.getName(), cloudlet, builder);
cloudlet.getVm().getHost().updateProcessing(simulation.clock());
};
if(hasCloudletResourceUsageChanged(cloudlet, taskUsage)){
addAvailableObject(cloudlet);
final CloudSimEvent evt =
new CloudSimEvent(
taskUsage.getStartTime(), cloudlet.getBroker(),
CloudSimTag.CLOUDLET_UPDATE_ATTRIBUTES, resourceUsageUpdateRunnable);
return cloudletUsageChangeEvents.add(evt);
}
return false;
}
/**
* Creates a {@link UtilizationModel} based on another one.
* If the given instance is a {@link UtilizationModelDynamic},
* otherwise a UtilizationModelDynamic is created without cloning
* another instance (that means it won't have the configurations
* defined by another model).
* Then, the initial utilization of the created {@link UtilizationModelDynamic}
* is set as the given parameter.
*
* @param source the utilization model that will be used as based to
* @param initialUtilization a percentage value (in scale from 0 to 1)
* to define the current utilization for the created {@link UtilizationModelDynamic}
* @return an {@link UtilizationModelDynamic} instance with the current utilization equals
* to the given parameter
*/
private UtilizationModel createUtilizationModel(final UtilizationModel source, final double initialUtilization){
if(source instanceof UtilizationModelDynamic umDynamic){
return new UtilizationModelDynamic(umDynamic, initialUtilization);
}
return new UtilizationModelDynamic(initialUtilization);
}
private boolean hasCloudletResourceUsageChanged(final Cloudlet cloudlet, final TaskUsage taskUsage){
return cloudlet.getUtilizationOfCpu() != taskUsage.getMeanCpuUsageRate() ||
cloudlet.getUtilizationOfRam() != taskUsage.getCanonicalMemoryUsage();
}
}