All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.memory.MemoryManagerConfig Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.memory;

import io.airlift.configuration.Config;
import io.airlift.configuration.ConfigDescription;
import io.airlift.configuration.DefunctConfig;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
import jakarta.validation.constraints.NotNull;

import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.units.DataSize.Unit.GIGABYTE;
import static io.airlift.units.DataSize.succinctBytes;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;

@DefunctConfig({
        "experimental.cluster-memory-manager-enabled",
        "query.low-memory-killer.enabled",
        "resources.reserved-system-memory"})
public class MemoryManagerConfig
{
    // enforced against user memory allocations
    private DataSize maxQueryMemory = DataSize.of(20, GIGABYTE);
    // enforced against user + system memory allocations (default is maxQueryMemory * 2)
    private DataSize maxQueryTotalMemory;
    private DataSize faultTolerantExecutionCoordinatorTaskMemory = DataSize.of(2, GIGABYTE);
    private DataSize faultTolerantExecutionTaskMemory = DataSize.of(5, GIGABYTE);
    private double faultTolerantExecutionTaskMemoryGrowthFactor = 3.0;
    private double faultTolerantExecutionTaskMemoryEstimationQuantile = 0.9;
    private DataSize faultTolerantExecutionTaskRuntimeMemoryEstimationOverhead = DataSize.of(1, GIGABYTE);
    private boolean faultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled = true;
    private DataSize faultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit = DataSize.of(20, GIGABYTE);
    private LowMemoryQueryKillerPolicy lowMemoryQueryKillerPolicy = LowMemoryQueryKillerPolicy.TOTAL_RESERVATION_ON_BLOCKED_NODES;
    private LowMemoryTaskKillerPolicy lowMemoryTaskKillerPolicy = LowMemoryTaskKillerPolicy.TOTAL_RESERVATION_ON_BLOCKED_NODES;
    // default value is overwritten for fault tolerant execution in {@link #applyFaultTolerantExecutionDefaults()}}
    private Duration killOnOutOfMemoryDelay = new Duration(30, SECONDS);

    @NotNull
    public DataSize getMaxQueryMemory()
    {
        return maxQueryMemory;
    }

    @Config("query.max-memory")
    public MemoryManagerConfig setMaxQueryMemory(DataSize maxQueryMemory)
    {
        this.maxQueryMemory = maxQueryMemory;
        return this;
    }

    @NotNull
    public DataSize getMaxQueryTotalMemory()
    {
        if (maxQueryTotalMemory == null) {
            return succinctBytes(maxQueryMemory.toBytes() * 2);
        }
        return maxQueryTotalMemory;
    }

    @Config("query.max-total-memory")
    public MemoryManagerConfig setMaxQueryTotalMemory(DataSize maxQueryTotalMemory)
    {
        this.maxQueryTotalMemory = maxQueryTotalMemory;
        return this;
    }

    @NotNull
    public DataSize getFaultTolerantExecutionCoordinatorTaskMemory()
    {
        return faultTolerantExecutionCoordinatorTaskMemory;
    }

    @Config("fault-tolerant-execution-coordinator-task-memory")
    @ConfigDescription("Estimated amount of memory a single coordinator task will use when task level retries are used; value is used when allocating nodes for tasks execution")
    public MemoryManagerConfig setFaultTolerantExecutionCoordinatorTaskMemory(DataSize faultTolerantExecutionCoordinatorTaskMemory)
    {
        this.faultTolerantExecutionCoordinatorTaskMemory = faultTolerantExecutionCoordinatorTaskMemory;
        return this;
    }

    @NotNull
    public DataSize getFaultTolerantExecutionTaskMemory()
    {
        return faultTolerantExecutionTaskMemory;
    }

    @Config("fault-tolerant-execution-task-memory")
    @ConfigDescription("Estimated amount of memory a single task will use when task level retries are used; value is used when allocating nodes for tasks execution")
    public MemoryManagerConfig setFaultTolerantExecutionTaskMemory(DataSize faultTolerantExecutionTaskMemory)
    {
        this.faultTolerantExecutionTaskMemory = faultTolerantExecutionTaskMemory;
        return this;
    }

    public double getFaultTolerantExecutionTaskMemoryGrowthFactor()
    {
        return faultTolerantExecutionTaskMemoryGrowthFactor;
    }

    @Config("fault-tolerant-execution-task-memory-growth-factor")
    @ConfigDescription("Factor by which estimated task memory is increased if task execution runs out of memory; value is used allocating nodes for tasks execution")
    public MemoryManagerConfig setFaultTolerantExecutionTaskMemoryGrowthFactor(double faultTolerantExecutionTaskMemoryGrowthFactor)
    {
        checkArgument(faultTolerantExecutionTaskMemoryGrowthFactor >= 1.0, "faultTolerantExecutionTaskMemoryGrowthFactor must not be less than 1.0");
        this.faultTolerantExecutionTaskMemoryGrowthFactor = faultTolerantExecutionTaskMemoryGrowthFactor;
        return this;
    }

    public double getFaultTolerantExecutionTaskMemoryEstimationQuantile()
    {
        return faultTolerantExecutionTaskMemoryEstimationQuantile;
    }

    @Config("fault-tolerant-execution-task-memory-estimation-quantile")
    @ConfigDescription("What quantile of memory usage of completed tasks to look at when estimating memory usage for upcoming tasks")
    public MemoryManagerConfig setFaultTolerantExecutionTaskMemoryEstimationQuantile(double faultTolerantExecutionTaskMemoryEstimationQuantile)
    {
        checkArgument(faultTolerantExecutionTaskMemoryEstimationQuantile >= 0.0 && faultTolerantExecutionTaskMemoryEstimationQuantile <= 1.0,
                "fault-tolerant-execution-task-memory-estimation-quantile must not be in [0.0, 1.0] range");
        this.faultTolerantExecutionTaskMemoryEstimationQuantile = faultTolerantExecutionTaskMemoryEstimationQuantile;
        return this;
    }

    @NotNull
    public DataSize getFaultTolerantExecutionTaskRuntimeMemoryEstimationOverhead()
    {
        return faultTolerantExecutionTaskRuntimeMemoryEstimationOverhead;
    }

    @Config("fault-tolerant-execution-task-runtime-memory-estimation-overhead")
    @ConfigDescription("Extra memory to account for when estimating actual task runtime memory consumption")
    public MemoryManagerConfig setFaultTolerantExecutionTaskRuntimeMemoryEstimationOverhead(DataSize faultTolerantExecutionTaskRuntimeMemoryEstimationOverhead)
    {
        this.faultTolerantExecutionTaskRuntimeMemoryEstimationOverhead = faultTolerantExecutionTaskRuntimeMemoryEstimationOverhead;
        return this;
    }

    public boolean isFaultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled()
    {
        return faultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled;
    }

    @Config("fault-tolerant-execution.memory-requirement-increase-on-worker-crash-enabled")
    @ConfigDescription("Increase memory requirement for tasks failed due to a suspected worker crash")
    public MemoryManagerConfig setFaultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled(boolean faultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled)
    {
        this.faultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled = faultTolerantExecutionMemoryRequirementIncreaseOnWorkerCrashEnabled;
        return this;
    }

    public DataSize getFaultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit()
    {
        return faultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit;
    }

    @Config("fault-tolerant-execution-eager-speculative-tasks-node_memory-overcommit")
    public MemoryManagerConfig setFaultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit(DataSize faultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit)
    {
        this.faultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit = faultTolerantExecutionEagerSpeculativeTasksNodeMemoryOvercommit;
        return this;
    }

    public LowMemoryQueryKillerPolicy getLowMemoryQueryKillerPolicy()
    {
        return lowMemoryQueryKillerPolicy;
    }

    @Config("query.low-memory-killer.policy")
    public MemoryManagerConfig setLowMemoryQueryKillerPolicy(LowMemoryQueryKillerPolicy lowMemoryQueryKillerPolicy)
    {
        this.lowMemoryQueryKillerPolicy = lowMemoryQueryKillerPolicy;
        return this;
    }

    public LowMemoryTaskKillerPolicy getLowMemoryTaskKillerPolicy()
    {
        return lowMemoryTaskKillerPolicy;
    }

    @Config("task.low-memory-killer.policy")
    public MemoryManagerConfig setLowMemoryTaskKillerPolicy(LowMemoryTaskKillerPolicy lowMemoryTaskKillerPolicy)
    {
        this.lowMemoryTaskKillerPolicy = lowMemoryTaskKillerPolicy;
        return this;
    }

    @NotNull
    public Duration getKillOnOutOfMemoryDelay()
    {
        return killOnOutOfMemoryDelay;
    }

    @Config("query.low-memory-killer.delay")
    @ConfigDescription("Delay between cluster running low on memory and invoking killer")
    public MemoryManagerConfig setKillOnOutOfMemoryDelay(Duration killOnOutOfMemoryDelay)
    {
        this.killOnOutOfMemoryDelay = killOnOutOfMemoryDelay;
        return this;
    }

    public void applyFaultTolerantExecutionDefaults()
    {
        killOnOutOfMemoryDelay = new Duration(0, MINUTES);
    }

    public enum LowMemoryQueryKillerPolicy
    {
        NONE,
        TOTAL_RESERVATION,
        TOTAL_RESERVATION_ON_BLOCKED_NODES,
        /**/;

        public static LowMemoryQueryKillerPolicy fromString(String value)
        {
            return switch (value.toLowerCase(ENGLISH)) {
                case "none" -> NONE;
                case "total-reservation" -> TOTAL_RESERVATION;
                case "total-reservation-on-blocked-nodes" -> TOTAL_RESERVATION_ON_BLOCKED_NODES;
                default -> throw new IllegalArgumentException(format("Unrecognized value: '%s'", value));
            };
        }
    }

    public enum LowMemoryTaskKillerPolicy
    {
        NONE,
        TOTAL_RESERVATION_ON_BLOCKED_NODES,
        LEAST_WASTE,
        /**/;

        public static LowMemoryTaskKillerPolicy fromString(String value)
        {
            return switch (value.toLowerCase(ENGLISH)) {
                case "none" -> NONE;
                case "total-reservation-on-blocked-nodes" -> TOTAL_RESERVATION_ON_BLOCKED_NODES;
                case "least-waste" -> LEAST_WASTE;
                default -> throw new IllegalArgumentException(format("Unrecognized value: '%s'", value));
            };
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy