io.radanalytics.operator.cluster.KubernetesSparkClusterDeployer Maven / Gradle / Ivy
package io.radanalytics.operator.cluster;
import io.fabric8.kubernetes.api.model.*;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.radanalytics.operator.historyServer.HistoryServerHelper;
import io.radanalytics.operator.resource.LabelsHelper;
import io.radanalytics.types.*;
import java.util.*;
import static io.radanalytics.operator.Constants.*;
import static io.radanalytics.operator.resource.LabelsHelper.OPERATOR_KIND_LABEL;
public class KubernetesSparkClusterDeployer {
private KubernetesClient client;
private String entityName;
private String prefix;
private String namespace;
KubernetesSparkClusterDeployer(KubernetesClient client, String entityName, String prefix, String namespace) {
this.client = client;
this.entityName = entityName;
this.prefix = prefix;
this.namespace = namespace;
}
public KubernetesResourceList getResourceList(SparkCluster cluster) {
synchronized (this.client) {
checkForInjectionVulnerabilities(cluster, namespace);
String name = cluster.getName();
Map allMasterLabels = new HashMap<>();
if (cluster.getLabels() != null) allMasterLabels.putAll(cluster.getLabels());
if (cluster.getMaster() != null && cluster.getMaster().getLabels() != null)
allMasterLabels.putAll(cluster.getMaster().getLabels());
ReplicationController masterRc = getRCforMaster(cluster);
ReplicationController workerRc = getRCforWorker(cluster);
Service masterService = getService(false, name, 7077, allMasterLabels);
List list = new ArrayList<>(Arrays.asList(masterRc, workerRc, masterService));
if (cluster.getSparkWebUI()) {
Service masterUiService = getService(true, name, 8080, allMasterLabels);
list.add(masterUiService);
}
// pvc for history server (in case of sharedVolume strategy)
if (HistoryServerHelper.needsVolume(cluster)) {
PersistentVolumeClaim pvc = getPersistentVolumeClaim(cluster, getDefaultLabels(name));
list.add(pvc);
}
KubernetesList resources = new KubernetesListBuilder().withItems(list).build();
return resources;
}
}
private ReplicationController getRCforMaster(SparkCluster cluster) {
return getRCforMasterOrWorker(true, cluster);
}
private ReplicationController getRCforWorker(SparkCluster cluster) {
return getRCforMasterOrWorker(false, cluster);
}
private Service getService(boolean isUi, String name, int port, Map allMasterLabels) {
Map labels = getDefaultLabels(name);
labels.put(prefix + LabelsHelper.OPERATOR_SEVICE_TYPE_LABEL, isUi ? OPERATOR_TYPE_UI_LABEL : OPERATOR_TYPE_MASTER_LABEL);
labels.putAll(allMasterLabels);
Service masterService = new ServiceBuilder().withNewMetadata().withName(isUi ? name + "-ui" : name)
.withLabels(labels).endMetadata()
.withNewSpec().withSelector(getSelector(name, name + "-m"))
.withPorts(new ServicePortBuilder().withPort(port).withNewTargetPort()
.withIntVal(port).endTargetPort().withProtocol("TCP").build())
.endSpec().build();
return masterService;
}
public static EnvVar env(String key, String value) {
return new EnvVarBuilder().withName(key).withValue(value).build();
}
private ReplicationController getRCforMasterOrWorker(boolean isMaster, SparkCluster cluster) {
String name = cluster.getName();
String podName = name + (isMaster ? "-m" : "-w");
Map selector = getSelector(name, podName);
List ports = new ArrayList<>(2);
List envVars = new ArrayList<>();
envVars.add(env("OSHINKO_SPARK_CLUSTER", name));
cluster.getEnv().forEach(kv -> {
envVars.add(env(kv.getName(), kv.getValue()));
});
if (isMaster) {
ContainerPort apiPort = new ContainerPortBuilder().withName("spark-master").withContainerPort(7077).withProtocol("TCP").build();
ports.add(apiPort);
if (cluster.getSparkWebUI()) {
ContainerPort uiPort = new ContainerPortBuilder().withName("spark-webui").withContainerPort(8080).withProtocol("TCP").build();
ports.add(uiPort);
}
} else {
envVars.add(env("SPARK_MASTER_ADDRESS", "spark://" + name + ":7077"));
if (cluster.getSparkWebUI()) {
ContainerPort uiPort = new ContainerPortBuilder().withName("spark-webui").withContainerPort(8081).withProtocol("TCP").build();
ports.add(uiPort);
envVars.add(env("SPARK_MASTER_UI_ADDRESS", "http://" + name + "-ui:8080"));
}
}
if (cluster.getMetrics()) {
envVars.add(env("SPARK_METRICS_ON", "prometheus"));
ContainerPort metricsPort = new ContainerPortBuilder().withName("metrics").withContainerPort(7777).withProtocol("TCP").build();
ports.add(metricsPort);
}
final String cmName = InitContainersHelper.getExpectedCMName(cluster);
final boolean cmExists = cmExists(cmName);
final int expectedMasterDelay = InitContainersHelper.getExpectedDelay(cluster, cmExists, true);
final int expectedWorkerDelay = InitContainersHelper.getExpectedDelay(cluster, cmExists, false);
Probe masterReadiness = new ProbeBuilder().withNewExec().withCommand(Arrays.asList("/bin/bash", "-c", "curl -s localhost:8080 | grep -e Status.*ALIVE")).endExec()
.withFailureThreshold(3)
.withInitialDelaySeconds(expectedMasterDelay - 4)
.withPeriodSeconds(7)
.withSuccessThreshold(1)
.withTimeoutSeconds(1).build();
Probe workerReadiness = new ProbeBuilder().withNewExec().withCommand(Arrays.asList("/bin/bash", "-c", "curl -s localhost:8081 | grep -e 'Master URL:.*spark://'" +
" || echo Unable to connect to the Spark master at $SPARK_MASTER_ADDRESS")).endExec()
.withFailureThreshold(3)
.withInitialDelaySeconds(expectedWorkerDelay - 4)
.withPeriodSeconds(7)
.withSuccessThreshold(1)
.withTimeoutSeconds(1).build();
Probe generalLivenessProbe = new ProbeBuilder().withFailureThreshold(3).withNewHttpGet()
.withPath("/")
.withNewPort().withIntVal(isMaster ? 8080 : 8081).endPort()
.withScheme("HTTP")
.endHttpGet()
.withPeriodSeconds(10)
.withSuccessThreshold(1)
.withFailureThreshold(6)
.withInitialDelaySeconds(isMaster ? expectedMasterDelay : expectedWorkerDelay)
.withTimeoutSeconds(1).build();
String imageRef = getDefaultSparkImage(); // from Constants
if (cluster.getCustomImage() != null) {
imageRef = cluster.getCustomImage();
}
ContainerBuilder containerBuilder = new ContainerBuilder().withEnv(envVars).withImage(imageRef)
.withImagePullPolicy("IfNotPresent")
.withName(name + (isMaster ? "-m" : "-w"))
.withTerminationMessagePath("/dev/termination-log")
.withTerminationMessagePolicy("File")
.withPorts(ports)
.withLivenessProbe(generalLivenessProbe)
.withReadinessProbe(isMaster ? masterReadiness : workerReadiness);
// limits & cmd
containerBuilder = augmentContainerBuilder(cluster, containerBuilder, isMaster);
// labels
Map labels = getDefaultLabels(name);
labels.put(prefix + LabelsHelper.OPERATOR_RC_TYPE_LABEL, isMaster ? OPERATOR_TYPE_MASTER_LABEL : OPERATOR_TYPE_WORKER_LABEL);
addLabels(labels, cluster, isMaster);
Map podLabels = getSelector(name, podName);
podLabels.put(prefix + LabelsHelper.OPERATOR_POD_TYPE_LABEL, isMaster ? OPERATOR_TYPE_MASTER_LABEL : OPERATOR_TYPE_WORKER_LABEL);
addLabels(podLabels, cluster, isMaster);
PodTemplateSpecFluent.SpecNested>> rcBuilder = new ReplicationControllerBuilder().withNewMetadata()
.withName(podName).withLabels(labels)
.endMetadata()
.withNewSpec().withReplicas(
isMaster
?
Optional.ofNullable(cluster.getMaster()).orElse(new Master()).getInstances()
:
Optional.ofNullable(cluster.getWorker()).orElse(new Worker()).getInstances()
)
.withSelector(selector)
.withNewTemplate().withNewMetadata().withLabels(podLabels).endMetadata()
.withNewSpec().withContainers(containerBuilder.build());
ReplicationController rc = rcBuilder.endSpec().endTemplate().endSpec().build();
// history server
if (isMaster && null != cluster.getHistoryServer()) {
augmentSparkConfWithHistoryServer(cluster);
}
// add init containers that will prepare the data on the nodes or override the configuration
if (!cluster.getDownloadData().isEmpty() || !cluster.getSparkConfiguration().isEmpty() || cmExists) {
InitContainersHelper.addInitContainers(rc, cluster, cmExists, isMaster);
}
return rc;
}
private PersistentVolumeClaim getPersistentVolumeClaim(SparkCluster cluster, Map labels) {
SharedVolume sharedVolume = Optional.ofNullable(cluster.getHistoryServer().getSharedVolume()).orElse(new SharedVolume());
Map requests = new HashMap<>();
requests.put("storage", new QuantityBuilder().withAmount(sharedVolume.getSize()).build());
Map matchLabels = sharedVolume.getMatchLabels();
if (null == matchLabels || matchLabels.isEmpty()) {
// if no match labels are specified, we assume the default one: radanalytics.io/SparkCluster: spark-cluster-name
matchLabels = new HashMap<>(1);
matchLabels.put(prefix + entityName, cluster.getName());
}
PersistentVolumeClaim pvc = new PersistentVolumeClaimBuilder().withNewMetadata().withName(cluster.getName() + "-claim").withLabels(labels).endMetadata()
.withNewSpec().withAccessModes("ReadWriteMany")
.withNewSelector().withMatchLabels(matchLabels).endSelector()
.withNewResources().withRequests(requests).endResources().endSpec().build();
return pvc;
}
private void augmentSparkConfWithHistoryServer(SparkCluster cluster) {
String eventLog;
if (HistoryServerHelper.needsVolume(cluster)) {
SharedVolume sharedVolume = Optional.ofNullable(cluster.getHistoryServer().getSharedVolume()).orElse(new SharedVolume());
eventLog = sharedVolume.getMountPath();
} else {
eventLog = cluster.getHistoryServer().getRemoteURI();
}
if (cluster.getSparkConfiguration().isEmpty()) {
SparkConfiguration nv1 = new SparkConfiguration();
nv1.setName("spark.eventLog.dir");
nv1.setValue(eventLog);
SparkConfiguration nv2 = new SparkConfiguration();
nv2.setName("spark.eventLog.enabled");
nv2.setValue("true");
// NameValue nv3 = new NameValue();
// nv3.setName("spark.history.fs.logDirectory");
// nv3.setValue(sharedVolume.getMountPath());
cluster.getSparkConfiguration().add(0, nv1);
cluster.getSparkConfiguration().add(0, nv2);
// cluster.getSparkConfiguration().add(0, nv3);
}
}
private ContainerBuilder augmentContainerBuilder(SparkCluster cluster, ContainerBuilder builder, boolean isMaster) {
Master m = null;
Worker w = null;
if (isMaster) {
m = Optional.ofNullable(cluster.getMaster()).orElse(new Master());
} else {
w = Optional.ofNullable(cluster.getWorker()).orElse(new Worker());
}
Map limits = new HashMap<>(2);
Optional.ofNullable(isMaster ? m.getMemory() : w.getMemory()).ifPresent(memory -> limits.put("memory", new Quantity(memory)));
Optional.ofNullable(isMaster ? m.getCpu() : w.getCpu()).ifPresent(cpu -> limits.put("cpu", new Quantity(cpu)));
if (!limits.isEmpty()) {
builder = builder.withResources(new ResourceRequirements(limits, limits));
}
List command = isMaster ? m.getCommand() : w.getCommand();
if (null != command) {
builder = builder.withCommand(command);
}
List commandArgs = isMaster ? m.getCommandArgs() : w.getCommandArgs();
if (null != commandArgs) {
builder = builder.withArgs(commandArgs);
}
return builder;
}
private void addLabels( Map labels, SparkCluster cluster, boolean isMaster) {
if (cluster.getLabels() != null) labels.putAll(cluster.getLabels());
if (isMaster) {
if (cluster.getMaster() != null && cluster.getMaster().getLabels() != null)
labels.putAll(cluster.getMaster().getLabels());
} else {
if (cluster.getWorker() != null && cluster.getWorker().getLabels() != null)
labels.putAll(cluster.getWorker().getLabels());
}
}
private boolean cmExists(String name) {
ConfigMap configMap;
if ("*".equals(namespace)) {
List items = client.configMaps().inAnyNamespace().withField("metadata.name", name).list().getItems();
configMap = items != null && !items.isEmpty() ? items.get(0) : null;
} else {
configMap = client.configMaps().inNamespace(namespace).withName(name).get();
}
return configMap != null && configMap.getData() != null && !configMap.getData().isEmpty();
}
private Map getSelector(String clusterName, String podName) {
Map map = getDefaultLabels(clusterName);
map.put(prefix + LabelsHelper.OPERATOR_DEPLOYMENT_LABEL, podName);
return map;
}
public Map getDefaultLabels(String name) {
Map map = new HashMap<>(3);
map.put(prefix + OPERATOR_KIND_LABEL, entityName);
map.put(prefix + entityName, name);
return map;
}
private void checkForInjectionVulnerabilities(SparkCluster app, String namespace) {
//todo: this
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy