All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.model.application.validation.JvmHeapSizeValidator Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.application.validation;

import com.yahoo.text.Text;
import com.yahoo.vespa.model.application.validation.Validation.Context;

import java.util.logging.Level;

/**
 * Validates that the container node flavour has enough resources to run configured ONNX models.
 *
 * @author bjorncs
 */
public class JvmHeapSizeValidator implements Validator {

    public static final int percentLimit = 15;
    public static final double gbLimit = 0.6;

    @Override
    public void validate(Context context) {
        if (!context.deployState().isHostedTenantApplication(context.model().getAdmin().getApplicationType())) return;

        context.model().getContainerClusters().forEach((clusterId, appCluster) -> {
            var mp = appCluster.getMemoryPercentage().orElse(null);
            if (mp == null) return;
            if (mp.asAbsoluteGb().isEmpty()) {
                context.deployState().getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
                return;
            }
            long jvmModelCost = appCluster.onnxModelCostCalculator().aggregatedModelCostInBytes();
            if (jvmModelCost > 0) {
                double availableMemoryGb = mp.asAbsoluteGb().getAsDouble();
                int percentageOfTotal = mp.ofContainerTotal().getAsInt();
                double modelCostGb = jvmModelCost / (1024D * 1024 * 1024);
                context.deployState().getDeployLogger().log(Level.FINE, () -> Text.format("JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB",
                        percentageOfTotal, percentLimit, availableMemoryGb, gbLimit, modelCostGb));
                if (percentageOfTotal < percentLimit) {
                    context.illegal(Text.format("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
                                    "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
                                    "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).",
                            clusterId, percentageOfTotal, percentLimit, modelCostGb));
                }
                if (availableMemoryGb < gbLimit) {
                    context.illegal(
                            Text.format("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " +
                                    "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
                                    "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).",
                                            clusterId, availableMemoryGb, gbLimit, modelCostGb));
                }
            }
        });
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy