All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.antgroup.geaflow.cluster.ray.clustermanager.RayClient Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 AntGroup CO., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package com.antgroup.geaflow.cluster.ray.clustermanager;

import com.antgroup.geaflow.cluster.clustermanager.ClusterInfo;
import com.antgroup.geaflow.cluster.config.ClusterConfig;
import com.antgroup.geaflow.cluster.container.ContainerContext;
import com.antgroup.geaflow.cluster.driver.DriverContext;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayContainerRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayDriverRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayMasterRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RaySupervisorRunner;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.options.ActorLifetime;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class RayClient implements Serializable {

    private static final Logger LOGGER = LoggerFactory.getLogger(RayClient.class);

    public static ActorHandle createMaster(ClusterConfig clusterConfig) {
        int totalMemoryMb = clusterConfig.getMasterMemoryMB();
        List jvmOptions = clusterConfig.getMasterJvmOptions().getJvmOptions();

        ActorHandle masterRayActor = Ray
            .actor(RayMasterRunner::new, clusterConfig.getConfig())
            .setMaxRestarts(clusterConfig.getMaxRestarts())
            .setLifetime(ActorLifetime.DETACHED)
            .setJvmOptions(jvmOptions).remote();
        LOGGER.info("master actor:{}, memoryMB:{}, jvmOptions:{}, foRestartTimes:{}",
            masterRayActor.getId().toString(), totalMemoryMb, jvmOptions,
            clusterConfig.getMaxRestarts());
        return masterRayActor;
    }

    public static ClusterInfo initMaster(ActorHandle masterActor) {
        LOGGER.info("init master:{}", masterActor.getId().toString());
        ObjectRef masterMetaRayObject = masterActor.task(RayMasterRunner::init)
            .remote();
        return masterMetaRayObject.get();
    }

    public static ActorHandle createDriver(ClusterConfig clusterConfig,
                                                            DriverContext context) {
        int totalMemoryMb = clusterConfig.getDriverMemoryMB();
        List jvmOptions = clusterConfig.getDriverJvmOptions().getJvmOptions();

        ActorHandle driverRayActor = Ray
            .actor(RayDriverRunner::new, context)
            .setMaxRestarts(clusterConfig.getMaxRestarts())
            .setLifetime(ActorLifetime.DETACHED)
            .setJvmOptions(jvmOptions).remote();
        LOGGER.info("driver actor:{}, memoryMB:{}, jvmOptions:{}, foRestartTimes:{}",
            driverRayActor.getId().toString(), totalMemoryMb, jvmOptions,
            clusterConfig.getMaxRestarts());
        return driverRayActor;
    }

    public static ActorHandle createContainer(ClusterConfig clusterConfig,
                                                                  ContainerContext containerContext) {
        ActorHandle rayContainer = Ray
            .actor(RayContainerRunner::new, containerContext)
            .setMaxRestarts(clusterConfig.getMaxRestarts())
            .setLifetime(ActorLifetime.DETACHED)
            .setJvmOptions(clusterConfig.getContainerJvmOptions().getJvmOptions())
            .remote();
        LOGGER.info("worker actor {} maxRestarts {}", rayContainer.getId().toString(),
            clusterConfig.getMaxRestarts());
        return rayContainer;
    }

    public static ActorHandle createSupervisor(ClusterConfig clusterConfig,
                                        Map envs) {
        ActorHandle rayContainer = Ray
            .actor(RaySupervisorRunner::new, clusterConfig.getConfig(), envs)
            .setMaxRestarts(clusterConfig.getMaxRestarts())
            .setLifetime(ActorLifetime.DETACHED)
            .setJvmOptions(clusterConfig.getSupervisorJvmOptions().getJvmOptions())
            .remote();
        LOGGER.info("supervisor actor {} maxRestarts {}", rayContainer.getId().toString(),
            clusterConfig.getMaxRestarts());
        return rayContainer;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy