All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.mantisrx.master.resourcecluster.ResourceClustersManagerActor Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2022 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.mantisrx.master.resourcecluster;

import akka.actor.AbstractActor;
import akka.actor.ActorRef;
import akka.actor.Props;
import akka.actor.SupervisorStrategy;
import akka.japi.pf.ReceiveBuilder;
import io.mantisrx.master.akka.MantisActorSupervisorStrategy;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.AddNewJobArtifactsToCacheRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetActiveJobsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetAssignedTaskExecutorRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetAvailableTaskExecutorsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetBusyTaskExecutorsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetDisabledTaskExecutorsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetJobArtifactsToCacheRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetRegisteredTaskExecutorsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetTaskExecutorStatusRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.GetUnregisteredTaskExecutorsRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.MarkExecutorTaskCancelledRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.RemoveJobArtifactsToCacheRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.ResourceOverviewRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.TaskExecutorBatchAssignmentRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.TaskExecutorGatewayRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterActor.TaskExecutorInfoRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterScalerActor.QueueClusterRuleRefreshRequest;
import io.mantisrx.master.resourcecluster.ResourceClusterScalerActor.TriggerClusterRuleRefreshRequest;
import io.mantisrx.master.resourcecluster.proto.SetResourceClusterScalerStatusRequest;
import io.mantisrx.server.master.config.MasterConfiguration;
import io.mantisrx.server.master.persistence.IMantisPersistenceProvider;
import io.mantisrx.server.master.persistence.MantisJobStore;
import io.mantisrx.server.master.resourcecluster.ClusterID;
import io.mantisrx.server.master.resourcecluster.TaskExecutorDisconnection;
import io.mantisrx.server.master.resourcecluster.TaskExecutorHeartbeat;
import io.mantisrx.server.master.resourcecluster.TaskExecutorRegistration;
import io.mantisrx.server.master.resourcecluster.TaskExecutorStatusChange;
import io.mantisrx.server.master.scheduler.JobMessageRouter;
import java.time.Clock;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import lombok.Builder;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.runtime.rpc.RpcService;

/**
 * Supervisor actor responsible for creating/deleting/listing all resource clusters in the system.
 */
@Slf4j
class ResourceClustersManagerActor extends AbstractActor {

    private final MasterConfiguration masterConfiguration;
    private final Clock clock;
    private final RpcService rpcService;
    private final MantisJobStore mantisJobStore;

    // Cluster Id to  map.
    private final Map resourceClusterActorMap;

    private final ActorRef resourceClusterHostActor;
    private final IMantisPersistenceProvider mantisPersistenceProvider;
    private final JobMessageRouter jobMessageRouter;

    public static Props props(
        MasterConfiguration masterConfiguration,
        Clock clock,
        RpcService rpcService,
        MantisJobStore mantisJobStore,
        ActorRef resourceClusterHostActorRef,
        IMantisPersistenceProvider mantisPersistenceProvider,
        JobMessageRouter jobMessageRouter) {
        return Props.create(
            ResourceClustersManagerActor.class,
            masterConfiguration,
            clock,
            rpcService,
            mantisJobStore,
            resourceClusterHostActorRef,
            mantisPersistenceProvider,
            jobMessageRouter);
    }

    public ResourceClustersManagerActor(
        MasterConfiguration masterConfiguration, Clock clock,
        RpcService rpcService,
        MantisJobStore mantisJobStore,
        ActorRef resourceClusterHostActorRef,
        IMantisPersistenceProvider mantisPersistenceProvider,
        JobMessageRouter jobMessageRouter) {
        this.masterConfiguration = masterConfiguration;
        this.clock = clock;
        this.rpcService = rpcService;
        this.mantisJobStore = mantisJobStore;
        this.resourceClusterHostActor = resourceClusterHostActorRef;
        this.mantisPersistenceProvider = mantisPersistenceProvider;
        this.jobMessageRouter = jobMessageRouter;

        this.resourceClusterActorMap = new HashMap<>();
    }

    @Override
    public Receive createReceive() {
        return
            ReceiveBuilder
                .create()
                .match(ListActiveClusters.class, req -> sender().tell(getActiveClusters(), self()))

                .match(GetRegisteredTaskExecutorsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetBusyTaskExecutorsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetDisabledTaskExecutorsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetAvailableTaskExecutorsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetUnregisteredTaskExecutorsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetTaskExecutorStatusRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetActiveJobsRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetAssignedTaskExecutorRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))
                .match(MarkExecutorTaskCancelledRequest.class, req -> getRCActor(req.getClusterID()).forward(req, context()))

                .match(TaskExecutorRegistration.class, registration ->
                    getRCActor(registration.getClusterID()).forward(registration, context()))
                .match(TaskExecutorHeartbeat.class, heartbeat ->
                    getRCActor(heartbeat.getClusterID()).forward(heartbeat, context()))
                .match(TaskExecutorStatusChange.class, statusChange ->
                    getRCActor(statusChange.getClusterID()).forward(statusChange, context()))
                .match(TaskExecutorDisconnection.class, disconnection ->
                    getRCActor(disconnection.getClusterID()).forward(disconnection, context()))
                .match(TaskExecutorBatchAssignmentRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(ResourceOverviewRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(TaskExecutorInfoRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(TaskExecutorGatewayRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(DisableTaskExecutorsRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(AddNewJobArtifactsToCacheRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(RemoveJobArtifactsToCacheRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(GetJobArtifactsToCacheRequest.class, req ->
                    getRCActor(req.getClusterID()).forward(req, context()))
                .match(TriggerClusterRuleRefreshRequest.class, req ->
                    getRCScalerActor(req.getClusterID()).forward(req, context()))
                .match(QueueClusterRuleRefreshRequest.class, req ->
                    getRCScalerActor(req.getClusterID()).forward(req, context()))
                .match(SetResourceClusterScalerStatusRequest.class, req ->
                    getRCScalerActor(req.getClusterID()).forward(req, context()))
                .build();
    }

    private ActorRef createResourceClusterActorFor(ClusterID clusterID) {
        log.info("Creating resource cluster actor for {}", clusterID);
        ActorRef clusterActor =
            getContext().actorOf(
                ResourceClusterActor.props(
                    clusterID,
                    Duration.ofMillis(masterConfiguration.getHeartbeatIntervalInMs()),
                    Duration.ofMillis(masterConfiguration.getAssignmentIntervalInMs()),
                    Duration.ofMillis(masterConfiguration.getAssignmentIntervalInMs()),
                    clock,
                    rpcService,
                    mantisJobStore,
                    jobMessageRouter,
                    masterConfiguration.getMaxJobArtifactsToCache(),
                    masterConfiguration.getJobClustersWithArtifactCachingEnabled(),
                    masterConfiguration.isJobArtifactCachingEnabled(),
                    masterConfiguration.getSchedulingConstraints(),
                    masterConfiguration.getFitnessCalculator()),
                "ResourceClusterActor-" + clusterID.getResourceID());
        log.info("Created resource cluster actor for {}", clusterID);
        return clusterActor;
    }

    private ActorRef createResourceClusterScalerActorFor(ClusterID clusterID, ActorRef rcActor) {
        log.info("Creating resource cluster scaler actor for {}", clusterID);
        ActorRef clusterScalerActor =
            getContext().actorOf(
                ResourceClusterScalerActor.props(
                    clusterID,
                    clock,
                    Duration.ofSeconds(masterConfiguration.getScalerTriggerThresholdInSecs()),
                    Duration.ofSeconds(masterConfiguration.getScalerRuleSetRefreshThresholdInSecs()),
                    this.mantisPersistenceProvider,
                    this.resourceClusterHostActor,
                    rcActor
                ),
                "ResourceClusterScalerActor-" + clusterID.getResourceID());
        log.info("Created resource cluster scaler actor for {}", clusterID);
        return clusterScalerActor;
    }

    private ActorRef getRCActor(ClusterID clusterID) {
        return getOrCreateRCActors(clusterID).getResourceClusterActor();
    }

    private ActorRef getRCScalerActor(ClusterID clusterID) {
        return getOrCreateRCActors(clusterID).getResourceClusterScalerActor();
    }

    private ActorHolder getOrCreateRCActors(ClusterID clusterID) {
        if (resourceClusterActorMap.get(clusterID) != null) {
            return resourceClusterActorMap.get(clusterID);
        } else {
            return resourceClusterActorMap.computeIfAbsent(clusterID, (dontCare) -> {
                ActorRef rcActorRef = createResourceClusterActorFor(clusterID);
                getContext().watch(rcActorRef);
                ActorRef scalerActorRef = createResourceClusterScalerActorFor(clusterID, rcActorRef);
                getContext().watch(scalerActorRef);

                return ActorHolder.builder()
                    .resourceClusterActor(rcActorRef)
                    .resourceClusterScalerActor(scalerActorRef)
                    .build();
            });
        }
    }

    private ClusterIdSet getActiveClusters() {
        return new ClusterIdSet(resourceClusterActorMap.keySet());
    }

    @Value
    static class ListActiveClusters {
    }

    @Value
    static class ClusterIdSet {
        Set clusterIDS;
    }

    @Override
    public SupervisorStrategy supervisorStrategy() {
        return MantisActorSupervisorStrategy.getInstance().create();
    }

    @Value
    @Builder
    static class ActorHolder {
        ActorRef resourceClusterActor;
        ActorRef resourceClusterScalerActor;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy