com.antgroup.geaflow.cluster.ray.clustermanager.RayClient Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2023 AntGroup CO., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.antgroup.geaflow.cluster.ray.clustermanager;
import com.antgroup.geaflow.cluster.clustermanager.ClusterInfo;
import com.antgroup.geaflow.cluster.config.ClusterConfig;
import com.antgroup.geaflow.cluster.container.ContainerContext;
import com.antgroup.geaflow.cluster.driver.DriverContext;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayContainerRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayDriverRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RayMasterRunner;
import com.antgroup.geaflow.cluster.ray.entrypoint.RaySupervisorRunner;
import io.ray.api.ActorHandle;
import io.ray.api.ObjectRef;
import io.ray.api.Ray;
import io.ray.api.options.ActorLifetime;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RayClient implements Serializable {
private static final Logger LOGGER = LoggerFactory.getLogger(RayClient.class);
public static ActorHandle createMaster(ClusterConfig clusterConfig) {
int totalMemoryMb = clusterConfig.getMasterMemoryMB();
List jvmOptions = clusterConfig.getMasterJvmOptions().getJvmOptions();
ActorHandle masterRayActor = Ray
.actor(RayMasterRunner::new, clusterConfig.getConfig())
.setMaxRestarts(clusterConfig.getMaxRestarts())
.setLifetime(ActorLifetime.DETACHED)
.setJvmOptions(jvmOptions).remote();
LOGGER.info("master actor:{}, memoryMB:{}, jvmOptions:{}, foRestartTimes:{}",
masterRayActor.getId().toString(), totalMemoryMb, jvmOptions,
clusterConfig.getMaxRestarts());
return masterRayActor;
}
public static ClusterInfo initMaster(ActorHandle masterActor) {
LOGGER.info("init master:{}", masterActor.getId().toString());
ObjectRef masterMetaRayObject = masterActor.task(RayMasterRunner::init)
.remote();
return masterMetaRayObject.get();
}
public static ActorHandle createDriver(ClusterConfig clusterConfig,
DriverContext context) {
int totalMemoryMb = clusterConfig.getDriverMemoryMB();
List jvmOptions = clusterConfig.getDriverJvmOptions().getJvmOptions();
ActorHandle driverRayActor = Ray
.actor(RayDriverRunner::new, context)
.setMaxRestarts(clusterConfig.getMaxRestarts())
.setLifetime(ActorLifetime.DETACHED)
.setJvmOptions(jvmOptions).remote();
LOGGER.info("driver actor:{}, memoryMB:{}, jvmOptions:{}, foRestartTimes:{}",
driverRayActor.getId().toString(), totalMemoryMb, jvmOptions,
clusterConfig.getMaxRestarts());
return driverRayActor;
}
public static ActorHandle createContainer(ClusterConfig clusterConfig,
ContainerContext containerContext) {
ActorHandle rayContainer = Ray
.actor(RayContainerRunner::new, containerContext)
.setMaxRestarts(clusterConfig.getMaxRestarts())
.setLifetime(ActorLifetime.DETACHED)
.setJvmOptions(clusterConfig.getContainerJvmOptions().getJvmOptions())
.remote();
LOGGER.info("worker actor {} maxRestarts {}", rayContainer.getId().toString(),
clusterConfig.getMaxRestarts());
return rayContainer;
}
public static ActorHandle createSupervisor(ClusterConfig clusterConfig,
Map envs) {
ActorHandle rayContainer = Ray
.actor(RaySupervisorRunner::new, clusterConfig.getConfig(), envs)
.setMaxRestarts(clusterConfig.getMaxRestarts())
.setLifetime(ActorLifetime.DETACHED)
.setJvmOptions(clusterConfig.getSupervisorJvmOptions().getJvmOptions())
.remote();
LOGGER.info("supervisor actor {} maxRestarts {}", rayContainer.getId().toString(),
clusterConfig.getMaxRestarts());
return rayContainer;
}
}