All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.controller.ApplicationController Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller;

import com.yahoo.component.Version;
import com.yahoo.component.VersionCompatibility;
import com.yahoo.config.application.api.DeploymentInstanceSpec;
import com.yahoo.config.application.api.DeploymentSpec;
import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;
import com.yahoo.config.application.api.ValidationId;
import com.yahoo.config.application.api.ValidationOverrides;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.CloudName;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.InstanceName;
import com.yahoo.config.provision.Tags;
import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.text.Text;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.athenz.api.AthenzDomain;
import com.yahoo.vespa.athenz.api.AthenzIdentity;
import com.yahoo.vespa.athenz.api.AthenzPrincipal;
import com.yahoo.vespa.athenz.api.AthenzService;
import com.yahoo.vespa.athenz.api.AthenzUser;
import com.yahoo.vespa.flags.FetchVector;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.ListFlag;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.flags.StringFlag;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentEndpoints;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.identifiers.InstanceId;
import com.yahoo.vespa.hosted.controller.api.integration.billing.BillingController;
import com.yahoo.vespa.hosted.controller.api.integration.billing.Plan;
import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificate;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ApplicationReindexing;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServer;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.DeploymentResult;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.DeploymentResult.LogEntry;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter;
import com.yahoo.vespa.hosted.controller.api.integration.dataplanetoken.DataplaneTokenVersions;
import com.yahoo.vespa.hosted.controller.api.integration.dataplanetoken.TokenId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationStore;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepository;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId;
import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter;
import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics;
import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics.Warning;
import com.yahoo.vespa.hosted.controller.application.DeploymentQuotaCalculator;
import com.yahoo.vespa.hosted.controller.application.EndpointList;
import com.yahoo.vespa.hosted.controller.application.QuotaUsage;
import com.yahoo.vespa.hosted.controller.application.SystemApplication;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackageStream;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackageValidator;
import com.yahoo.vespa.hosted.controller.application.pkg.BasicServicesXml;
import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade;
import com.yahoo.vespa.hosted.controller.certificate.EndpointCertificates;
import com.yahoo.vespa.hosted.controller.concurrent.Once;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger;
import com.yahoo.vespa.hosted.controller.deployment.JobStatus;
import com.yahoo.vespa.hosted.controller.deployment.Run;
import com.yahoo.vespa.hosted.controller.notification.Notification;
import com.yahoo.vespa.hosted.controller.notification.NotificationSource;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import com.yahoo.vespa.hosted.controller.routing.PreparedEndpoints;
import com.yahoo.vespa.hosted.controller.security.AccessControl;
import com.yahoo.vespa.hosted.controller.security.Credentials;
import com.yahoo.vespa.hosted.controller.support.access.SupportAccessGrant;
import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant;
import com.yahoo.vespa.hosted.controller.tenant.CloudTenant;
import com.yahoo.vespa.hosted.controller.tenant.Tenant;
import com.yahoo.vespa.hosted.controller.versions.VersionStatus;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence;
import com.yahoo.yolean.Exceptions;

import java.io.ByteArrayInputStream;
import java.security.Principal;
import java.security.cert.X509Certificate;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.function.UnaryOperator;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.yahoo.vespa.flags.FetchVector.Dimension.INSTANCE_ID;
import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.active;
import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.reserved;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.high;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.low;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.normal;
import static java.util.Comparator.naturalOrder;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;

/**
 * A singleton owned by {@link Controller} which contains the methods and state for controlling applications.
 *
 * @author bratseth
 */
public class ApplicationController {

    private static final Logger log = Logger.getLogger(ApplicationController.class.getName());

    /** The controller owning this */
    private final Controller controller;

    /** For persistence */
    private final CuratorDb curator;

    private final ArtifactRepository artifactRepository;
    private final ApplicationStore applicationStore;
    private final AccessControl accessControl;
    private final ConfigServer configServer;
    private final Clock clock;
    private final DeploymentTrigger deploymentTrigger;
    private final ApplicationPackageValidator applicationPackageValidator;
    private final EndpointCertificates endpointCertificates;
    private final StringFlag dockerImageRepoFlag;
    private final ListFlag incompatibleVersions;
    private final BillingController billingController;
    private final ListFlag cloudAccountsFlag;

    private final Map deploymentInfo = new ConcurrentHashMap<>();

    ApplicationController(Controller controller, CuratorDb curator, AccessControl accessControl, Clock clock,
                          FlagSource flagSource, BillingController billingController) {
        this.controller = Objects.requireNonNull(controller);
        this.curator = Objects.requireNonNull(curator);
        this.accessControl = Objects.requireNonNull(accessControl);
        this.configServer = controller.serviceRegistry().configServer();
        this.clock = Objects.requireNonNull(clock);
        this.billingController = Objects.requireNonNull(billingController);

        artifactRepository = controller.serviceRegistry().artifactRepository();
        applicationStore = controller.serviceRegistry().applicationStore();
        dockerImageRepoFlag = PermanentFlags.DOCKER_IMAGE_REPO.bindTo(flagSource);
        incompatibleVersions = PermanentFlags.INCOMPATIBLE_VERSIONS.bindTo(flagSource);
        cloudAccountsFlag = PermanentFlags.CLOUD_ACCOUNTS.bindTo(flagSource);
        deploymentTrigger = new DeploymentTrigger(controller, clock);
        applicationPackageValidator = new ApplicationPackageValidator(controller);
        endpointCertificates = new EndpointCertificates(controller,
                                                        controller.serviceRegistry().endpointCertificateProvider(),
                                                        controller.serviceRegistry().endpointCertificateValidator());

        // Update serialization format of all applications
        Once.after(Duration.ofMinutes(1), () -> {
            Instant start = clock.instant();
            int count = 0;
            for (TenantAndApplicationId id : curator.readApplicationIds()) {
                lockApplicationIfPresent(id, application -> {
                    for (var declaredInstance : application.get().deploymentSpec().instances())
                        if ( ! application.get().instances().containsKey(declaredInstance.name()))
                            application = withNewInstance(application, id.instance(declaredInstance.name()));
                    store(application);
                });
                count++;
            }
            log.log(Level.INFO, Text.format("Wrote %d applications in %s", count,
                                            Duration.between(start, clock.instant())));
        });
    }

    /** Validate the given application package */
    public void validatePackage(ApplicationPackage applicationPackage, Application application) {
        applicationPackageValidator.validate(application, applicationPackage, clock.instant());
    }

    public Set accountsOf(TenantName tenant) {
        return cloudAccountsFlag.with(FetchVector.Dimension.TENANT_ID, tenant.value())
                                .value().stream()
                                .map(CloudAccount::from)
                                .collect(Collectors.toSet());
    }

    /** Returns the application with the given id, or null if it is not present */
    public Optional getApplication(TenantAndApplicationId id) {
        return curator.readApplication(id);
    }

    /** Returns the instance with the given id, or null if it is not present */
    public Optional getInstance(ApplicationId id) {
        return getApplication(TenantAndApplicationId.from(id)).flatMap(application -> application.get(id.instance()));
    }

    /**
     * Returns in-memory info for the given deployment pulled from the node repo.
     * Info on any existing deployment can be missing if it has not yet been fetched since this instance was started.
     * This is kept up to date by DeploymentInfoMaintainer.
     * Accessing this is thread safe.
     */
    // TODO: Replace the wire level Application by a DeploymentInfo class in the model
    public Map deploymentInfo() { return deploymentInfo; }

    /**
     * Triggers reindexing for the given document types in the given clusters, for the given application.
     * 

* If no clusters are given, reindexing is triggered for the entire application; otherwise * if no documents types are given, reindexing is triggered for all given clusters; otherwise * reindexing is triggered for the cartesian product of the given clusters and document types. */ public void reindex(ApplicationId id, ZoneId zoneId, List clusterNames, List documentTypes, boolean indexedOnly, Double speed, String cause) { configServer.reindex(new DeploymentId(id, zoneId), clusterNames, documentTypes, indexedOnly, speed, cause); } /** Returns the reindexing status for the given application in the given zone. */ public ApplicationReindexing applicationReindexing(ApplicationId id, ZoneId zoneId) { return configServer.getReindexing(new DeploymentId(id, zoneId)); } /** Enables reindexing for the given application in the given zone. */ public void enableReindexing(ApplicationId id, ZoneId zoneId) { configServer.enableReindexing(new DeploymentId(id, zoneId)); } /** Disables reindexing for the given application in the given zone. */ public void disableReindexing(ApplicationId id, ZoneId zoneId) { configServer.disableReindexing(new DeploymentId(id, zoneId)); } /** * Returns the application with the given id * * @throws IllegalArgumentException if it does not exist */ public Application requireApplication(TenantAndApplicationId id) { return getApplication(id).orElseThrow(() -> new IllegalArgumentException(id + " not found")); } /** * Returns the instance with the given id * * @throws IllegalArgumentException if it does not exist */ // TODO jonvm: remove or inline public Instance requireInstance(ApplicationId id) { return getInstance(id).orElseThrow(() -> new IllegalArgumentException(id + " not found")); } /** Returns a snapshot of all applications */ public List asList() { return curator.readApplications(false); } /** * Returns a snapshot of all readable applications. Unlike {@link ApplicationController#asList()} this ignores * applications that cannot currently be read (e.g. due to serialization issues) and may return an incomplete * snapshot. * * This should only be used in cases where acting on a subset of applications is better than none. */ public List readable() { return curator.readApplications(true); } /** Returns the ID of all known applications. */ public List idList() { return curator.readApplicationIds(); } /** Returns a snapshot of all applications of a tenant */ public List asList(TenantName tenant) { return curator.readApplications(tenant); } public ArtifactRepository artifacts() { return artifactRepository; } public ApplicationStore applicationStore() { return applicationStore; } /** Returns all currently reachable content clusters among the given deployments. */ public Map> reachableContentClustersByZone(Collection ids) { Map> clusters = new TreeMap<>(Comparator.comparing(ZoneId::value)); for (DeploymentId id : ids) if (isHealthy(id)) clusters.put(id.zoneId(), List.copyOf(configServer.getContentClusters(id))); return Collections.unmodifiableMap(clusters); } /** Reads the oldest installed platform for the given application and zone from job history, or a node repo. */ private Optional oldestInstalledPlatform(JobStatus job) { Version oldest = null; for (Run run : job.runs().descendingMap().values()) { Version version = run.versions().targetPlatform(); if (oldest == null || version.isBefore(oldest)) oldest = version; if (run.hasSucceeded()) return Optional.of(oldest); } // If no successful run was found, ask the node repository in the relevant zone. return oldestInstalledPlatform(job.id()); } /** Reads the oldest installed platform for the given application and zone from the node repo of that zone. */ private Optional oldestInstalledPlatform(JobId job) { return configServer.nodeRepository().list(job.type().zone(), NodeFilter.all() .applications(job.application()) .states(active, reserved)) .stream() .map(Node::currentVersion) .filter(version -> ! version.isEmpty()) .min(naturalOrder()); } /** Returns the oldest Vespa version installed on any active or reserved production node for the given application. */ public Optional oldestInstalledPlatform(Application application) { return controller.jobController().deploymentStatus(application).jobs() .production() .not().test() .asList().stream() .map(this::oldestInstalledPlatform) .flatMap(Optional::stream) .min(naturalOrder()); } /** * Returns the preferred Vespa version to compile against, for *

* The returned version is not newer than the oldest deployed platform for the application, unless * the target major differs from the oldest deployed platform, in which case it is not newer than * the oldest available platform version on that major instead. *

* The returned version is compatible with a platform version available in the system. *

* A candidate is sought first among versions with non-broken confidence, then among those with forgotten confidence. *

* The returned version is the latest in the relevant candidate set. *

* If no such version exists, an {@link IllegalArgumentException} is thrown. */ public Version compileVersion(TenantAndApplicationId id, OptionalInt wantedMajor) { // Read version status, and pick out target platforms we could run the compiled package on. Optional application = getApplication(id); Optional oldestInstalledPlatform = application.flatMap(this::oldestInstalledPlatform); VersionStatus versionStatus = controller.readVersionStatus(); UpgradePolicy policy = application.flatMap(app -> app.deploymentSpec().instances().stream() .map(DeploymentInstanceSpec::upgradePolicy) .max(naturalOrder())) .orElse(UpgradePolicy.defaultPolicy); Confidence targetConfidence = switch (policy) { case canary -> broken; case defaultPolicy -> normal; case conservative -> high; }; // Target platforms are all versions not older than the oldest installed platform, unless forcing a major version change. // Only platforms not older than the system version, and with appropriate confidence, are considered targets. Predicate isTargetPlatform = wantedMajor.isEmpty() && oldestInstalledPlatform.isEmpty() ? __ -> true // No preferences for version: any platform version is ok. : wantedMajor.isEmpty() || (oldestInstalledPlatform.isPresent() && wantedMajor.getAsInt() == oldestInstalledPlatform.get().getMajor()) ? version -> ! version.isBefore(oldestInstalledPlatform.get()) // Major empty, or on same as oldest: ensure not a platform downgrade. : version -> wantedMajor.getAsInt() == version.getMajor(); // Major specified, and not on same as oldest (possibly empty): any on that major. Set platformVersions = versionStatus.deployableVersions().stream() .filter(version -> version.confidence().equalOrHigherThan(targetConfidence)) .map(VespaVersion::versionNumber) .filter(isTargetPlatform) .collect(toSet()); oldestInstalledPlatform.ifPresent(fallback -> { if (wantedMajor.isEmpty() || wantedMajor.getAsInt() == fallback.getMajor()) platformVersions.add(fallback); }); if (platformVersions.isEmpty()) throw new IllegalArgumentException("this system has no available versions" + (wantedMajor.isPresent() ? " on specified major: " + wantedMajor.getAsInt() : "")); // The returned compile version must be compatible with at least one target platform. // If it is incompatible with any of the current platforms, the system will trigger a platform change. // The returned compile version should also be at least as old as both the oldest target platform version, // and the oldest current platform, unless the two are incompatible, in which case only the target matters, // or there are no installed platforms, in which case we prefer the newest target platform. VersionCompatibility compatibility = versionCompatibility(id.defaultInstance()); // Wrong id level >_< Version oldestTargetPlatform = platformVersions.stream().min(naturalOrder()).get(); Version newestVersion = oldestInstalledPlatform.isEmpty() ? platformVersions.stream().max(naturalOrder()).get() : compatibility.accept(oldestInstalledPlatform.get(), oldestTargetPlatform) && oldestInstalledPlatform.get().isBefore(oldestTargetPlatform) ? oldestInstalledPlatform.get() : oldestTargetPlatform; Predicate systemCompatible = version -> ! version.isAfter(newestVersion) && platformVersions.stream().anyMatch(platform -> compatibility.accept(platform, version)); // Find the newest, system-compatible version with non-broken confidence. Optional nonBroken = versionStatus.versions().stream() .filter(VespaVersion::isReleased) .filter(version -> version.confidence().equalOrHigherThan(low)) .map(VespaVersion::versionNumber) .filter(systemCompatible) .max(naturalOrder()); // Fall back to the newest, system-compatible version with unknown confidence. For public systems, this implies high confidence. Set knownVersions = versionStatus.versions().stream().map(VespaVersion::versionNumber).collect(toSet()); Optional unknown = controller.mavenRepository().metadata().versions(clock.instant()).stream() .filter(version -> ! knownVersions.contains(version)) .filter(systemCompatible) .max(naturalOrder()); if (nonBroken.isPresent()) { if (controller.system().isPublic() && unknown.isPresent() && unknown.get().isAfter(nonBroken.get())) return unknown.get(); return nonBroken.get(); } if (unknown.isPresent()) return unknown.get(); throw new IllegalArgumentException("no suitable, released compile version exists" + (wantedMajor.isPresent() ? " for specified major: " + wantedMajor.getAsInt() : "")); } /** * Creates a new application for an existing tenant. * * @throws IllegalArgumentException if the application already exists */ public Application createApplication(TenantAndApplicationId id, Credentials credentials) { try (Mutex lock = lock(id)) { if (getApplication(id).isPresent()) throw new IllegalArgumentException("Could not create '" + id + "': Application already exists"); if (getApplication(dashToUnderscore(id)).isPresent()) // VESPA-1945 throw new IllegalArgumentException("Could not create '" + id + "': Application " + dashToUnderscore(id) + " already exists"); com.yahoo.vespa.hosted.controller.api.identifiers.ApplicationId.validate(id.application().value()); if (controller.tenants().get(id.tenant()).isEmpty()) throw new IllegalArgumentException("Could not create '" + id + "': This tenant does not exist"); accessControl.createApplication(id, credentials); LockedApplication locked = new LockedApplication(new Application(id, clock.instant()), lock); store(locked); log.info("Created " + locked); return locked.get(); } } /** * Creates a new instance for an existing application. * * @throws IllegalArgumentException if the instance already exists, or has an invalid instance name. */ public void createInstance(ApplicationId id) { lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { store(withNewInstance(application, id)); }); } /** Returns given application with a new instance */ public LockedApplication withNewInstance(LockedApplication application, ApplicationId instance) { if (instance.instance().isTester()) throw new IllegalArgumentException("'" + instance + "' is a tester application!"); InstanceId.validate(instance.instance().value()); if (getInstance(instance).isPresent()) throw new IllegalArgumentException("Could not create '" + instance + "': Instance already exists"); if (getInstance(dashToUnderscore(instance)).isPresent()) // VESPA-1945 throw new IllegalArgumentException("Could not create '" + instance + "': Instance " + dashToUnderscore(instance) + " already exists"); log.info("Created " + instance); return application.withNewInstance(instance.instance()); } /** Deploys an application package for an existing application instance. */ public DeploymentResult deploy(JobId job, boolean deploySourceVersions, Consumer deployLogger, UnaryOperator> cloudAccountOverride) { if (job.application().instance().isTester()) throw new IllegalArgumentException("'" + job.application() + "' is a tester application!"); TenantAndApplicationId applicationId = TenantAndApplicationId.from(job.application()); ZoneId zone = job.type().zone(); DeploymentId deployment = new DeploymentId(job.application(), zone); try (Mutex deploymentLock = lockForDeployment(job.application(), zone)) { Run run = controller.jobController().last(job) .orElseThrow(() -> new IllegalStateException("No known run of '" + job + "'")); if (run.hasEnded()) throw new IllegalStateException("No deployment expected for " + job + " now, as no job is running"); Version platform = run.versions().sourcePlatform().filter(__ -> deploySourceVersions).orElse(run.versions().targetPlatform()); RevisionId revision = run.versions().sourceRevision().filter(__ -> deploySourceVersions).orElse(run.versions().targetRevision()); ApplicationPackageStream applicationPackage = new ApplicationPackageStream(() -> applicationStore.stream(deployment, revision)); AtomicReference lastRevision = new AtomicReference<>(); // Prepare endpoints lazily Supplier preparedEndpoints = () -> { try (Mutex lock = lock(applicationId)) { LockedApplication application = new LockedApplication(requireApplication(applicationId), lock); application.get().revisions().last().map(ApplicationVersion::id).ifPresent(lastRevision::set); return prepareEndpoints(deployment, job, application, applicationPackage, deployLogger, lock); } }; // Carry out deployment without holding the application lock. DeploymentDataAndResult dataAndResult = deploy(job.application(), applicationPackage, zone, platform, preparedEndpoints, run.isDryRun(), run.testerCertificate(), cloudAccountOverride); // Record the quota usage for this application var quotaUsage = deploymentQuotaUsage(zone, job.application()); // For direct deployments use the full deployment ID, but otherwise use just the tenant and application as // the source since it's the same application, so it should have the same warnings. // These notifications are only updated when the last submitted revision is deployed here. NotificationSource source = zone.environment().isManuallyDeployed() ? NotificationSource.from(deployment) : revision.equals(lastRevision.get()) ? NotificationSource.from(applicationId) : null; if (source != null) { List warnings = Optional.ofNullable(dataAndResult.result().log()) .map(logs -> logs.stream() .filter(LogEntry::concernsPackage) .filter(log -> log.level().intValue() >= Level.WARNING.intValue()) .map(LogEntry::message) .sorted() .distinct() .toList()) .orElseGet(List::of); if (warnings.isEmpty()) controller.notificationsDb().removeNotification(source, Notification.Type.applicationPackage); else controller.notificationsDb().setApplicationPackageNotification(source, warnings); } lockApplicationOrThrow(applicationId, application -> store(application.with(job.application().instance(), i -> i.withNewDeployment(zone, revision, platform, clock.instant(), warningsFrom(dataAndResult.result().log()), quotaUsage, dataAndResult.data().cloudAccount().orElse(CloudAccount.empty), dataAndResult.data.dataPlaneTokens())))); return dataAndResult.result(); } } private PreparedEndpoints prepareEndpoints(DeploymentId deployment, JobId job, LockedApplication application, ApplicationPackageStream applicationPackage, Consumer deployLogger, Mutex applicationLock) { Instance instance = application.get().require(job.application().instance()); Tags tags = applicationPackage.truncatedPackage().deploymentSpec().instance(instance.name()) .map(DeploymentInstanceSpec::tags) .orElseGet(Tags::empty); EndpointCertificate certificate = endpointCertificates.get(deployment, applicationPackage.truncatedPackage().deploymentSpec(), applicationLock); deployLogger.accept("Using CA signed certificate version %s".formatted(certificate.version())); BasicServicesXml services = applicationPackage.truncatedPackage().services(deployment, tags); return controller.routing().of(deployment).prepare(services, certificate, application); } /** Stores the deployment spec and validation overrides from the application package, and runs cleanup. Returns new instances. */ public List storeWithUpdatedConfig(LockedApplication application, ApplicationPackage applicationPackage) { validatePackage(applicationPackage, application.get()); application = application.with(applicationPackage.deploymentSpec()); application = application.with(applicationPackage.validationOverrides()); var existingInstances = application.get().instances(); var declaredInstances = applicationPackage.deploymentSpec().instances(); for (var declaredInstance : declaredInstances) { if ( ! existingInstances.containsKey(declaredInstance.name())) application = withNewInstance(application, application.get().id().instance(declaredInstance.name())); } // Delete zones not listed in DeploymentSpec, if allowed // We do this at deployment time for externally built applications, and at submission time // for internally built ones, to be able to return a validation failure message when necessary for (InstanceName name : existingInstances.keySet()) { application = withoutDeletedDeployments(application, name); } // Validate new deployment spec thoroughly before storing it. DeploymentStatus status = controller.jobController().deploymentStatus(application.get()); Change dummyChange = Change.of(RevisionId.forProduction(Long.MAX_VALUE)); // Should always run everywhere. for (var jobs : status.jobsToRun(applicationPackage.deploymentSpec().instanceNames().stream() .collect(toMap(name -> name, __ -> dummyChange))) .entrySet()) { for (var job : jobs.getValue()) { decideCloudAccountOf(new DeploymentId(jobs.getKey().application(), job.type().zone()), applicationPackage.deploymentSpec()); } } for (Notification notification : controller.notificationsDb().listNotifications(NotificationSource.from(application.get().id()), true)) { if ( notification.source().instance().isPresent() && ( ! declaredInstances.contains(notification.source().instance().get()) || ! notification.source().zoneId().map(application.get().require(notification.source().instance().get()).deployments()::containsKey).orElse(false))) controller.notificationsDb().removeNotifications(notification.source()); } store(application); return declaredInstances.stream() .map(DeploymentInstanceSpec::name) .filter(instance -> ! existingInstances.containsKey(instance)) .toList(); } /** Deploy a system application to given zone */ public void deploy(SystemApplication application, ZoneId zone, Version version, boolean allowDowngrade) { if (application.hasApplicationPackage()) { deploySystemApplicationPackage(application, zone, version); } else { // Deploy by calling node repository directly configServer.nodeRepository().upgrade(zone, application.nodeType(), version, allowDowngrade); } } /** Deploy a system application to given zone */ public DeploymentResult deploySystemApplicationPackage(SystemApplication application, ZoneId zone, Version version) { if (application.hasApplicationPackage()) { ApplicationPackageStream applicationPackage = new ApplicationPackageStream( () -> new ByteArrayInputStream(artifactRepository.getSystemApplicationPackage(application.id(), zone, version)) ); return deploy(application.id(), applicationPackage, zone, version, null, false, Optional.empty(), UnaryOperator.identity()).result(); } else { throw new RuntimeException("This system application does not have an application package: " + application.id().toShortString()); } } /** Deploys the given tester application to the given zone. */ public DeploymentResult deployTester(TesterId tester, ApplicationPackageStream applicationPackage, ZoneId zone, Version platform, UnaryOperator> cloudAccountOverride) { return deploy(tester.id(), applicationPackage, zone, platform, null, false, Optional.empty(), cloudAccountOverride).result(); } private record DeploymentDataAndResult(DeploymentData data, DeploymentResult result) {} private DeploymentDataAndResult deploy(ApplicationId application, ApplicationPackageStream applicationPackage, ZoneId zone, Version platform, Supplier preparedEndpoints, boolean dryRun, Optional testerCertificate, UnaryOperator> cloudAccountOverride) { DeploymentId deployment = new DeploymentId(application, zone); // Routing and metadata may have changed, so we need to refresh state after deployment, even if deployment fails. interface CleanCloseable extends AutoCloseable { void close(); } AtomicReference generatedEndpoints = new AtomicReference<>(EndpointList.EMPTY); try (CleanCloseable postDeployment = () -> updateRoutingAndMeta(deployment, applicationPackage, generatedEndpoints)) { Optional dockerImageRepo = Optional.ofNullable( dockerImageRepoFlag .with(FetchVector.Dimension.ZONE_ID, zone.value()) .with(INSTANCE_ID, application.serializedForm()) .value()) .filter(s -> !s.isBlank()) .map(DockerImage::fromString); Optional domain = controller.tenants().get(application.tenant()) .filter(tenant-> tenant instanceof AthenzTenant) .map(tenant -> ((AthenzTenant)tenant).domain()); Supplier deploymentQuota = () -> DeploymentQuotaCalculator.calculate(billingController.getQuota(application.tenant()), asList(application.tenant()), application, zone, applicationPackage.truncatedPackage().deploymentSpec()); List tenantSecretStores = controller.tenants() .get(application.tenant()) .filter(tenant-> tenant instanceof CloudTenant) .map(tenant -> ((CloudTenant) tenant).tenantSecretStores()) .orElse(List.of()); List operatorCertificates = controller.supportAccess().activeGrantsFor(deployment).stream() .map(SupportAccessGrant::certificate) .toList(); if (testerCertificate.isPresent()) { operatorCertificates = Stream.concat(operatorCertificates.stream(), testerCertificate.stream()).toList(); } Supplier> cloudAccount = () -> cloudAccountOverride.apply(decideCloudAccountOf(deployment, applicationPackage.truncatedPackage().deploymentSpec())); Supplier endpoints = () -> { if (preparedEndpoints == null) return DeploymentEndpoints.none; PreparedEndpoints prepared = preparedEndpoints.get(); generatedEndpoints.set(prepared.endpoints().generated()); return new DeploymentEndpoints(prepared.containerEndpoints(), Optional.of(prepared.certificate())); }; Supplier> dataplaneTokenVersions = () -> { Tags tags = applicationPackage.truncatedPackage().deploymentSpec() .instance(application.instance()) .map(DeploymentInstanceSpec::tags) .orElse(Tags.empty()); BasicServicesXml services = applicationPackage.truncatedPackage().services(deployment, tags); Set referencedTokens = services.containers().stream() .flatMap(container -> container.dataPlaneTokens().stream()) .collect(toSet()); List currentTokens = controller.dataplaneTokenService().listTokens(application.tenant()).stream() .filter(token -> referencedTokens.contains(token.tokenId())) .toList(); return Stream.concat(currentTokens.stream(), referencedTokens.stream() .filter(token -> currentTokens.stream().noneMatch(t -> t.tokenId().equals(token))) .map(token -> new DataplaneTokenVersions(token, List.of(), Instant.EPOCH))) .toList(); }; DeploymentData deploymentData = new DeploymentData(application, zone, applicationPackage::zipStream, platform, endpoints, dockerImageRepo, domain, deploymentQuota, tenantSecretStores, operatorCertificates, cloudAccount, dataplaneTokenVersions, dryRun); ConfigServer.PreparedApplication preparedApplication = configServer.deploy(deploymentData); return new DeploymentDataAndResult(deploymentData, preparedApplication.deploymentResult()); } } private void updateRoutingAndMeta(DeploymentId id, ApplicationPackageStream data, AtomicReference generatedEndpoints) { if (id.applicationId().instance().isTester()) return; controller.routing().of(id).activate(data.truncatedPackage().deploymentSpec(), generatedEndpoints.get()); if ( ! id.zoneId().environment().isManuallyDeployed()) return; controller.applications().applicationStore().putMeta(id, clock.instant(), data.truncatedPackage().metaDataZip()); } public Optional decideCloudAccountOf(DeploymentId deployment, DeploymentSpec spec) { ZoneId zoneId = deployment.zoneId(); CloudName cloud = controller.zoneRegistry().get(zoneId).getCloudName(); CloudAccount requestedAccount = spec.cloudAccount(cloud, deployment.applicationId().instance(), deployment.zoneId()); if (requestedAccount.isUnspecified()) return Optional.empty(); TenantName tenant = deployment.applicationId().tenant(); Set tenantAccounts = accountsOf(tenant); if ( ! tenantAccounts.contains(requestedAccount)) { throw new IllegalArgumentException("Requested cloud account '" + requestedAccount.value() + "' is not valid for tenant '" + tenant + "'"); } if ( ! controller.zoneRegistry().hasZone(zoneId, requestedAccount)) { throw new IllegalArgumentException("Zone " + zoneId + " is not configured in requested cloud account '" + requestedAccount.value() + "'"); } return Optional.of(requestedAccount); } private LockedApplication withoutDeletedDeployments(LockedApplication application, InstanceName instance) { DeploymentSpec deploymentSpec = application.get().deploymentSpec(); List deploymentsToRemove = application.get().require(instance).productionDeployments().values().stream() .map(Deployment::zone) .filter(zone -> deploymentSpec.instance(instance).isEmpty() || ! deploymentSpec.requireInstance(instance).deploysTo(zone.environment(), zone.region())) .toList(); if (deploymentsToRemove.isEmpty()) return application; if ( ! application.get().validationOverrides().allows(ValidationId.deploymentRemoval, clock.instant())) throw new IllegalArgumentException(ValidationId.deploymentRemoval.value() + ": " + application.get().require(instance) + " is deployed in " + deploymentsToRemove.stream() .map(zone -> zone.region().value()) .collect(joining(", ")) + ", but " + (deploymentsToRemove.size() > 1 ? "these " : "this ") + "instance and region combination" + (deploymentsToRemove.size() > 1 ? "s are" : " is") + " removed from deployment.xml. " + ValidationOverrides.toAllowMessage(ValidationId.deploymentRemoval)); // Remove the instance as well, if it is no longer referenced, and contains only production deployments that are removed now. boolean removeInstance = ! deploymentSpec.instanceNames().contains(instance) && application.get().require(instance).deployments().size() == deploymentsToRemove.size(); for (ZoneId zone : deploymentsToRemove) { application = deactivate(application.get().id().instance(instance), zone, Optional.of(application)).get(); } if (removeInstance) { application = application.without(instance); } return application; } /** * Deletes the given application. All known instances of the applications will be deleted. * * @throws IllegalArgumentException if the application has deployments or the caller is not authorized */ public void deleteApplication(TenantAndApplicationId id, Credentials credentials) { deleteApplication(id, Optional.of(credentials)); } public void deleteApplication(TenantAndApplicationId id, Optional credentials) { lockApplicationOrThrow(id, application -> { var deployments = application.get().instances().values().stream() .filter(instance -> ! instance.deployments().isEmpty()) .collect(toMap(instance -> instance.name(), instance -> instance.deployments().keySet().stream() .map(ZoneId::toString) .collect(joining(", ")))); if ( ! deployments.isEmpty()) throw new IllegalArgumentException("Could not delete '" + application + "': It has active deployments: " + deployments); for (Instance instance : application.get().instances().values()) { controller.routing().removeRotationEndpointsFromDns(application.get(), instance.name()); application = application.without(instance.name()); } applicationStore.removeAll(id.tenant(), id.application()); applicationStore.putMetaTombstone(id.tenant(), id.application(), clock.instant()); credentials.ifPresent(creds -> accessControl.deleteApplication(id, creds)); curator.removeApplication(id); controller.jobController().collectGarbage(); controller.notificationsDb().removeNotifications(NotificationSource.from(id)); log.info("Deleted " + id); }); } /** * Deletes the the given application instance. * * @throws IllegalArgumentException if the application has deployments or the caller is not authorized * @throws NotExistsException if the instance does not exist */ public void deleteInstance(ApplicationId instanceId) { if (getInstance(instanceId).isEmpty()) throw new NotExistsException("Could not delete instance '" + instanceId + "': Instance not found"); lockApplicationOrThrow(TenantAndApplicationId.from(instanceId), application -> { if ( ! application.get().require(instanceId.instance()).deployments().isEmpty()) throw new IllegalArgumentException("Could not delete '" + application + "': It has active deployments in: " + application.get().require(instanceId.instance()).deployments().keySet().stream().map(ZoneId::toString) .sorted().collect(joining(", "))); if ( ! application.get().deploymentSpec().equals(DeploymentSpec.empty) && application.get().deploymentSpec().instanceNames().contains(instanceId.instance())) throw new IllegalArgumentException("Can not delete '" + instanceId + "', which is specified in 'deployment.xml'; remove it there first"); controller.routing().removeRotationEndpointsFromDns(application.get(), instanceId.instance()); curator.writeApplication(application.without(instanceId.instance()).get()); controller.jobController().collectGarbage(); controller.notificationsDb().removeNotifications(NotificationSource.from(instanceId)); log.info("Deleted " + instanceId); }); } /** * Replace any previous version of this application by this instance * * @param application a locked application to store */ public void store(LockedApplication application) { curator.writeApplication(application.get()); } /** * Acquire a locked application to modify and store, if there is an application with the given id. * * @param applicationId ID of the application to lock and get. * @param action Function which acts on the locked application. */ public void lockApplicationIfPresent(TenantAndApplicationId applicationId, Consumer action) { try (Mutex lock = lock(applicationId)) { getApplication(applicationId).map(application -> new LockedApplication(application, lock)).ifPresent(action); } } /** * Acquire a locked application to modify and store, or throw an exception if no application has the given id. * * @param applicationId ID of the application to lock and require. * @param action Function which acts on the locked application. * @throws IllegalArgumentException when application does not exist. */ public void lockApplicationOrThrow(TenantAndApplicationId applicationId, Consumer action) { try (Mutex lock = lock(applicationId)) { action.accept(new LockedApplication(requireApplication(applicationId), lock)); } } /** * Tells config server to schedule a restart of all nodes in this deployment * * @param restartFilter Variables to filter which nodes to restart. */ public void restart(DeploymentId deploymentId, RestartFilter restartFilter) { configServer.restart(deploymentId, restartFilter); } /** * Asks the config server whether this deployment is currently healthy, i.e., serving traffic as usual. * If this cannot be ascertained, we must assume it is not. */ public boolean isHealthy(DeploymentId deploymentId) { try { return ! isSuspended(deploymentId); // consider adding checks again global routing status, etc.? } catch (RuntimeException e) { log.log(Level.WARNING, "Failed getting suspension status of " + deploymentId + ": " + Exceptions.toMessageString(e)); return false; } } /** * Asks the config server whether this deployment is currently suspended: * Not in a state where it should receive traffic. */ public boolean isSuspended(DeploymentId deploymentId) { return configServer.isSuspended(deploymentId); } /** Sets suspension status of the given deployment in its zone. */ public void setSuspension(DeploymentId deploymentId, boolean suspend) { configServer.setSuspension(deploymentId, suspend); } /** Deactivate application in the given zone. Even if the application itself does not exist, deactivation of the deployment will still be attempted */ public void deactivate(ApplicationId instanceId, ZoneId zone) { TenantAndApplicationId applicationId = TenantAndApplicationId.from(instanceId); try (Mutex deploymentLock = lockForDeployment(instanceId, zone)) { try (Mutex lock = lock(applicationId)) { Optional application = getApplication(applicationId).map(app -> new LockedApplication(app, lock)); deactivate(instanceId, zone, application).ifPresent(this::store); } } } /** * Deactivates a locked application without storing it * * @return the application with the deployment in the given zone removed */ private Optional deactivate(ApplicationId instanceId, ZoneId zone, Optional application) { DeploymentId id = new DeploymentId(instanceId, zone); interface CleanCloseable extends AutoCloseable { void close(); } try (CleanCloseable postDeactivation = () -> { application.ifPresent(app -> controller.routing().of(id).deactivate(app.get().deploymentSpec())); if (id.zoneId().environment().isManuallyDeployed()) applicationStore.putMetaTombstone(id, clock.instant()); if ( ! id.zoneId().environment().isTest()) controller.notificationsDb().removeNotifications(NotificationSource.from(id)); }) { configServer.deactivate(id); return application.map(app -> app.with(instanceId.instance(), instance -> instance.withoutDeploymentIn(id.zoneId()))); } } public DeploymentTrigger deploymentTrigger() { return deploymentTrigger; } /** * Returns a lock which provides exclusive rights to changing this application. * Any operation which stores an application need to first acquire this lock, then read, modify * and store the application, and finally release (close) the lock. */ Mutex lock(TenantAndApplicationId application) { return curator.lock(application); } /** * Returns a lock which provides exclusive rights to deploying this application to the given zone. */ private Mutex lockForDeployment(ApplicationId application, ZoneId zone) { return curator.lockForDeployment(application, zone); } public VersionCompatibility versionCompatibility(ApplicationId id) { return VersionCompatibility.fromVersionList(incompatibleVersions.with(INSTANCE_ID, id.serializedForm()).value()); } /** * Verifies that the application can be deployed to the tenant, following these rules: * * 1. Verify that the Athenz service can be launched by the config server * 2. If the principal is given, verify that the principal is tenant admin or admin of the tenant domain * 3. If the principal is not given, verify that the Athenz domain of the tenant equals Athenz domain given in deployment.xml * * @param tenantName tenant where application should be deployed * @param applicationPackage application package * @param deployer principal initiating the deployment, possibly empty */ public void verifyApplicationIdentityConfiguration(TenantName tenantName, Optional deployment, ApplicationPackage applicationPackage, Optional deployer) { Optional identityDomain = applicationPackage.deploymentSpec().athenzDomain() .map(domain -> new AthenzDomain(domain.value())); if (identityDomain.isEmpty()) { // If there is no domain configured in deployment.xml there is nothing to do. return; } // Verify that the system supports launching services. // Consider adding a capability to the system. if ( ! (accessControl instanceof AthenzFacade)) { throw new IllegalArgumentException("Athenz domain and service specified in deployment.xml, but not supported by system."); } // Verify that the config server is allowed to launch the service specified verifyAllowedLaunchAthenzService(applicationPackage.deploymentSpec()); // If a user principal is initiating the request, verify that the user is allowed to launch the service. // Either the user is member of the domain admin role, or is given the "launch" privilege on the service. Optional athenzUser = getUser(deployer); if (athenzUser.isPresent()) { // This is a direct deployment, and we need only validate what the configserver will actually launch. DeploymentId id = deployment.orElseThrow(() -> new IllegalArgumentException("Unable to evaluate access, no zone provided in deployment")); var serviceToLaunch = applicationPackage.deploymentSpec().athenzService(id.applicationId().instance(), id.zoneId().environment(), id.zoneId().region()) .map(service -> new AthenzService(identityDomain.get(), service.value())); if (serviceToLaunch.isPresent()) { if ( ! ((AthenzFacade) accessControl).canLaunch(athenzUser.get(), serviceToLaunch.get()) && // launch privilege ! ((AthenzFacade) accessControl).hasTenantAdminAccess(athenzUser.get(), identityDomain.get()) // tenant admin ) { throw new IllegalArgumentException("User " + athenzUser.get().getFullName() + " is not allowed to launch " + "service " + serviceToLaunch.get().getFullName() + ". " + "Please reach out to the domain admin."); } } else { // This is a rare edge case where deployment.xml specifies athenz-service on each step, but not on the root. // It is undefined which service should be launched, so handle this as an error. throw new IllegalArgumentException("Athenz domain configured, but no service defined for deployment to " + id.zoneId().value()); } } else { // If this is a deployment pipeline, verify that the domain in deployment.xml is the same as the tenant domain. Access control is already validated before this step. Tenant tenant = controller.tenants().require(tenantName); AthenzDomain tenantDomain = ((AthenzTenant) tenant).domain(); if ( ! Objects.equals(tenantDomain, identityDomain.get())) throw new IllegalArgumentException("Athenz domain in deployment.xml: [" + identityDomain.get().getName() + "] " + "must match tenant domain: [" + tenantDomain.getName() + "]"); } } private TenantAndApplicationId dashToUnderscore(TenantAndApplicationId id) { return TenantAndApplicationId.from(id.tenant().value(), id.application().value().replaceAll("-", "_")); } private ApplicationId dashToUnderscore(ApplicationId id) { return dashToUnderscore(TenantAndApplicationId.from(id)).instance(id.instance()); } private QuotaUsage deploymentQuotaUsage(ZoneId zoneId, ApplicationId applicationId) { var application = configServer.nodeRepository().getApplication(zoneId, applicationId); return DeploymentQuotaCalculator.calculateQuotaUsage(application); } /* * Get the AthenzUser from this principal or Optional.empty if this does not represent a user. */ private Optional getUser(Optional deployer) { return deployer .filter(AthenzPrincipal.class::isInstance) .map(AthenzPrincipal.class::cast) .map(AthenzPrincipal::getIdentity) .filter(AthenzUser.class::isInstance) .map(AthenzUser.class::cast); } /* * Verifies that the configured athenz service (if any) can be launched. */ private void verifyAllowedLaunchAthenzService(DeploymentSpec deploymentSpec) { deploymentSpec.athenzDomain().ifPresent(domain -> { controller.zoneRegistry().zones().reachable().ids().forEach(zone -> { AthenzIdentity configServerAthenzIdentity = controller.zoneRegistry().getConfigServerHttpsIdentity(zone); deploymentSpec.athenzService().ifPresent(service -> { verifyAthenzServiceCanBeLaunchedBy(configServerAthenzIdentity, new AthenzService(domain.value(), service.value())); }); deploymentSpec.instances().forEach(spec -> { spec.athenzService(zone.environment(), zone.region()).ifPresent(service -> { verifyAthenzServiceCanBeLaunchedBy(configServerAthenzIdentity, new AthenzService(domain.value(), service.value())); }); }); }); }); } private void verifyAthenzServiceCanBeLaunchedBy(AthenzIdentity configServerAthenzIdentity, AthenzService athenzService) { if ( ! ((AthenzFacade) accessControl).canLaunch(configServerAthenzIdentity, athenzService)) throw new IllegalArgumentException("Not allowed to launch Athenz service " + athenzService.getFullName()); } /** Extract deployment warnings metric from deployment result */ private static Map warningsFrom(List log) { return log.stream() .filter(entry -> entry.level().intValue() >= Level.WARNING.intValue()) // TODO: Categorize warnings. Response from config server should be updated to include the appropriate // category and typed log level .collect(groupingBy(__ -> Warning.all, collectingAndThen(counting(), Long::intValue))); } public void verifyPlan(TenantName tenantName) { var planId = controller.serviceRegistry().billingController().getPlan(tenantName); Optional plan = controller.serviceRegistry().planRegistry().plan(planId); if (plan.isEmpty()) throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has no plan, not allowed to deploy. See https://cloud.vespa.ai/support"); if (plan.get().quota().calculate().equals(Quota.zero())) throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has a plan '" + plan.get().displayName() + "' with zero quota, not allowed to deploy. See https://cloud.vespa.ai/support"); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy