All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.kubernetes.KubernetesClusterDescriptor Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.kubernetes;

import org.apache.flink.client.deployment.ClusterDeploymentException;
import org.apache.flink.client.deployment.ClusterDescriptor;
import org.apache.flink.client.deployment.ClusterRetrieveException;
import org.apache.flink.client.deployment.ClusterSpecification;
import org.apache.flink.client.program.ClusterClient;
import org.apache.flink.client.program.ClusterClientProvider;
import org.apache.flink.client.program.rest.RestClusterClient;
import org.apache.flink.configuration.BlobServerOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.HighAvailabilityOptions;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.kubernetes.configuration.KubernetesConfigOptions;
import org.apache.flink.kubernetes.configuration.KubernetesConfigOptionsInternal;
import org.apache.flink.kubernetes.entrypoint.KubernetesJobClusterEntrypoint;
import org.apache.flink.kubernetes.entrypoint.KubernetesSessionClusterEntrypoint;
import org.apache.flink.kubernetes.kubeclient.Endpoint;
import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient;
import org.apache.flink.kubernetes.kubeclient.KubernetesJobManagerSpecification;
import org.apache.flink.kubernetes.kubeclient.factory.KubernetesJobManagerFactory;
import org.apache.flink.kubernetes.kubeclient.parameters.KubernetesJobManagerParameters;
import org.apache.flink.kubernetes.utils.Constants;
import org.apache.flink.kubernetes.utils.KubernetesInitializerUtils;
import org.apache.flink.kubernetes.utils.KubernetesUtils;
import org.apache.flink.runtime.entrypoint.ClusterEntrypoint;
import org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils;
import org.apache.flink.runtime.highavailability.nonha.standalone.StandaloneClientHAServices;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
import org.apache.flink.util.FlinkException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.Optional;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Kubernetes specific {@link ClusterDescriptor} implementation.
 */
public class KubernetesClusterDescriptor implements ClusterDescriptor {

	private static final Logger LOG = LoggerFactory.getLogger(KubernetesClusterDescriptor.class);

	private static final String CLUSTER_DESCRIPTION = "Kubernetes cluster";

	private final Configuration flinkConfig;

	private final FlinkKubeClient client;

	private final String clusterId;

	public KubernetesClusterDescriptor(Configuration flinkConfig, FlinkKubeClient client) {
		this.flinkConfig = flinkConfig;
		this.client = client;
		this.clusterId = checkNotNull(
			flinkConfig.getString(KubernetesConfigOptions.CLUSTER_ID),
			"ClusterId must be specified!");
	}

	@Override
	public String getClusterDescription() {
		return CLUSTER_DESCRIPTION;
	}

	private ClusterClientProvider createClusterClientProvider(String clusterId) {
		return () -> {
			final Configuration configuration = new Configuration(flinkConfig);

			final Optional restEndpoint = client.getRestEndpoint(clusterId).join();

			if (restEndpoint.isPresent()) {
				configuration.setString(RestOptions.ADDRESS, restEndpoint.get().getAddress());
				configuration.setInteger(RestOptions.PORT, restEndpoint.get().getPort());
			} else {
				throw new RuntimeException(
						new ClusterRetrieveException(
								"Could not get the rest endpoint of " + clusterId));
			}

			try {
				// Flink client will always use Kubernetes service to contact with jobmanager. So we have a pre-configured web
				// monitor address. Using StandaloneClientHAServices to create RestClusterClient is reasonable.
				return new RestClusterClient<>(
					configuration,
					clusterId,
					new StandaloneClientHAServices(HighAvailabilityServicesUtils.getWebMonitorAddress(
						configuration, HighAvailabilityServicesUtils.AddressResolution.TRY_ADDRESS_RESOLUTION)));
			} catch (Exception e) {
				client.handleException(e);
				throw new RuntimeException(new ClusterRetrieveException("Could not create the RestClusterClient.", e));
			}
		};
	}

	@Override
	public ClusterClientProvider retrieve(String clusterId) {
		final ClusterClientProvider clusterClientProvider = createClusterClientProvider(clusterId);

		try (ClusterClient clusterClient = clusterClientProvider.getClusterClient()) {
			LOG.info(
				"Retrieve flink cluster {} successfully, JobManager Web Interface: {}",
				clusterId,
				clusterClient.getWebInterfaceURL());
		}
		return clusterClientProvider;
	}

	@Override
	public ClusterClientProvider deploySessionCluster(ClusterSpecification clusterSpecification) throws ClusterDeploymentException {
		final ClusterClientProvider clusterClientProvider = deployClusterInternal(
			KubernetesSessionClusterEntrypoint.class.getName(),
			clusterSpecification,
			null,
			false);

		try (ClusterClient clusterClient = clusterClientProvider.getClusterClient()) {
			LOG.info(
				"Create flink session cluster {} successfully, JobManager Web Interface: {}",
				clusterId,
				clusterClient.getWebInterfaceURL());
		}
		return clusterClientProvider;
	}

	@Override
	public ClusterClientProvider deployJobCluster(
			ClusterSpecification clusterSpecification,
			JobGraph jobGraph,
			boolean detached) throws ClusterDeploymentException {
		try {
			return deployClusterInternal(
				KubernetesJobClusterEntrypoint.class.getName(),
				clusterSpecification,
				jobGraph,
				detached);
		} catch (Exception e) {
			throw new ClusterDeploymentException("Could not deploy Kubernetes job cluster.", e);
		}
	}

	private ClusterClientProvider deployClusterInternal(
			String entryPoint,
			ClusterSpecification clusterSpecification,
			@Nullable JobGraph jobGraph,
			boolean detached) throws ClusterDeploymentException {
		final ClusterEntrypoint.ExecutionMode executionMode = detached ?
			ClusterEntrypoint.ExecutionMode.DETACHED
			: ClusterEntrypoint.ExecutionMode.NORMAL;
		flinkConfig.setString(ClusterEntrypoint.EXECUTION_MODE, executionMode.toString());

		flinkConfig.setString(KubernetesConfigOptionsInternal.ENTRY_POINT_CLASS, entryPoint);

		flinkConfig.setBoolean(KubernetesConfigOptionsInternal.ENABLE_INIT_CONTAINER, jobGraph != null);

		// Rpc, blob, rest, taskManagerRpc ports need to be exposed, so update them to fixed values.
		KubernetesUtils.checkAndUpdatePortConfigOption(flinkConfig, BlobServerOptions.PORT, Constants.BLOB_SERVER_PORT);
		KubernetesUtils.checkAndUpdatePortConfigOption(
			flinkConfig,
			TaskManagerOptions.RPC_PORT,
			Constants.TASK_MANAGER_RPC_PORT);

		if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfig)) {
			flinkConfig.setString(HighAvailabilityOptions.HA_CLUSTER_ID, clusterId);
			KubernetesUtils.checkAndUpdatePortConfigOption(
				flinkConfig,
				HighAvailabilityOptions.HA_JOB_MANAGER_PORT_RANGE,
				flinkConfig.get(JobManagerOptions.PORT));
		}

		try {
			final KubernetesJobManagerParameters kubernetesJobManagerParameters =
				new KubernetesJobManagerParameters(flinkConfig, clusterSpecification);

			// only for the per job mode
			if (jobGraph != null) {
				KubernetesInitializerUtils.uploadLocalDependencies(
					flinkConfig,
					jobGraph,
					kubernetesJobManagerParameters,
					clusterId);
			}

			final KubernetesJobManagerSpecification kubernetesJobManagerSpec =
				KubernetesJobManagerFactory.createJobManagerComponent(kubernetesJobManagerParameters);

			client.createJobManagerComponent(kubernetesJobManagerSpec);

			LOG.info("Start to set {}-ApplicationId {} into System.Properties", Thread.currentThread().getId(), clusterId);
			System.setProperty(Thread.currentThread().getId() + "-ApplicationId", clusterId);

			LOG.info("Start to set {}-ApplicationId {} into System.Properties", Thread.currentThread().getId(), clusterId);
			System.setProperty(Thread.currentThread().getId() + "-ApplicationId", clusterId);

			LOG.info("Start to set {}-ApplicationId {} into System.Properties", Thread.currentThread().getId(), clusterId);
			System.setProperty(Thread.currentThread().getId() + "-ApplicationId", clusterId);

			return createClusterClientProvider(clusterId);
		} catch (Exception e) {
			try {
				LOG.warn("Failed to create the Kubernetes cluster \"{}\", try to clean up the residual resources.", clusterId);
				client.stopAndCleanupCluster(clusterId);
			} catch (Exception e1) {
				LOG.info("Failed to stop and clean up the Kubernetes cluster \"{}\".", clusterId, e1);
			}
			throw new ClusterDeploymentException("Could not create Kubernetes cluster \"" + clusterId + "\".", e);
		}
	}

	@Override
	public void killCluster(String clusterId) throws FlinkException {
		try {
			client.stopAndCleanupCluster(clusterId);
		} catch (Exception e) {
			client.handleException(e);
			throw new FlinkException("Could not kill Kubernetes cluster " + clusterId);
		}
	}

	@Override
	public void close() {
		try {
			client.close();
		} catch (Exception e) {
			client.handleException(e);
			LOG.error("failed to close client, exception {}", e.toString());
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy