All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.util.ZooKeeperUtils Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.util;

import org.apache.curator.utils.ZKPaths;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.HighAvailabilityOptions;
import org.apache.flink.configuration.IllegalConfigurationException;
import org.apache.flink.configuration.SecurityOptions;
import org.apache.flink.runtime.checkpoint.CompletedCheckpoint;
import org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
import org.apache.flink.runtime.checkpoint.ZooKeeperCheckpointIDCounter;
import org.apache.flink.runtime.checkpoint.ZooKeeperCompletedCheckpointStore;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
import org.apache.flink.runtime.jobmanager.SubmittedJobGraph;
import org.apache.flink.runtime.jobmanager.ZooKeeperSubmittedJobGraphStore;
import org.apache.flink.runtime.leaderelection.ZooKeeperLeaderElectionService;
import org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService;
import org.apache.flink.runtime.zookeeper.RetrievableStateStorageHelper;
import org.apache.flink.runtime.zookeeper.filesystem.FileSystemStateStorageHelper;
import org.apache.flink.util.Preconditions;

import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.api.ACLProvider;
import org.apache.curator.framework.imps.DefaultACLProvider;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.concurrent.Executor;

import static org.apache.flink.util.Preconditions.checkNotNull;

public class ZooKeeperUtils {

	private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperUtils.class);

	/** The prefix of the submittedJobGraph file */
	public static final String HA_STORAGE_SUBMITTED_JOBGRAPH_PREFIX = "submittedJobGraph";

	/** The prefix of the completedCheckpoint file */
	public static final String HA_STORAGE_COMPLETED_CHECKPOINT = "completedCheckpoint";

	/**
	 * Starts a {@link CuratorFramework} instance and connects it to the given ZooKeeper
	 * quorum.
	 *
	 * @param configuration {@link Configuration} object containing the configuration values
	 * @return {@link CuratorFramework} instance
	 */
	public static CuratorFramework startCuratorFramework(Configuration configuration) {
		Preconditions.checkNotNull(configuration, "configuration");
		String zkQuorum = configuration.getValue(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM);

		if (zkQuorum == null || StringUtils.isBlank(zkQuorum)) {
			throw new RuntimeException("No valid ZooKeeper quorum has been specified. " +
					"You can specify the quorum via the configuration key '" +
					HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM.key() + "'.");
		}

		int sessionTimeout = configuration.getInteger(HighAvailabilityOptions.ZOOKEEPER_SESSION_TIMEOUT);

		int connectionTimeout = configuration.getInteger(HighAvailabilityOptions.ZOOKEEPER_CONNECTION_TIMEOUT);

		int retryWait = configuration.getInteger(HighAvailabilityOptions.ZOOKEEPER_RETRY_WAIT);

		int maxRetryAttempts = configuration.getInteger(HighAvailabilityOptions.ZOOKEEPER_MAX_RETRY_ATTEMPTS);

		String root = configuration.getValue(HighAvailabilityOptions.HA_ZOOKEEPER_ROOT);

		String namespace = configuration.getValue(HighAvailabilityOptions.HA_CLUSTER_ID);

		boolean disableSaslClient = configuration.getBoolean(SecurityOptions.ZOOKEEPER_SASL_DISABLE);

		ACLProvider aclProvider;

		ZkClientACLMode aclMode = ZkClientACLMode.fromConfig(configuration);

		boolean enableAclClient = false;
		String aclScheme = null, aclAuth = null;
		if (disableSaslClient) {
			aclScheme = configuration.getValue(HighAvailabilityOptions.ZOOKEEPER_ACL_SCHEME);
			aclAuth = configuration.getValue(HighAvailabilityOptions.ZOOKEEPER_ACL_AUTH);

			if (aclScheme != null && !aclScheme.isEmpty()) {
				enableAclClient = true;
				LOG.info("Access Zookeeper using {} ACL authentication.", aclScheme);
			}
		} else {
			LOG.info("Access Zookeeper using SASL authentication.");
		}

		boolean isAuthEnabled = !disableSaslClient || enableAclClient;
		if (!isAuthEnabled && aclMode == ZkClientACLMode.CREATOR) {
			String errorMessage = "Cannot set ACL role to " + aclMode + " since authentication is not enabled.";
			LOG.warn(errorMessage);
			throw new IllegalConfigurationException(errorMessage);
		}

		if (aclMode == ZkClientACLMode.CREATOR) {
			LOG.info("Enforcing creator for ZK connections");
			aclProvider = new SecureAclProvider();
		} else {
			LOG.info("Enforcing default ACL for ZK connections");
			aclProvider = new DefaultACLProvider();
		}

		String rootWithNamespace = generateZookeeperPath(root, namespace);

		LOG.info("Using '{}' as Zookeeper namespace.", rootWithNamespace);

		CuratorFrameworkFactory.Builder builder = CuratorFrameworkFactory.builder()
				.connectString(zkQuorum)
				.sessionTimeoutMs(sessionTimeout)
				.connectionTimeoutMs(connectionTimeout)
				.retryPolicy(new ExponentialBackoffRetry(retryWait, maxRetryAttempts))
				// Curator prepends a '/' manually and throws an Exception if the
				// namespace starts with a '/'.
				.namespace(rootWithNamespace.startsWith("/") ? rootWithNamespace.substring(1) : rootWithNamespace)
				.aclProvider(aclProvider);

		CuratorFramework cf = enableAclClient ?	builder.authorization(aclScheme, aclAuth.getBytes()).build() : builder.build();

		cf.start();

		// set acl for the namespace path
		if (enableAclClient) {
			try {
				cf.setACL().withACL(builder.getAclProvider().getDefaultAcl()).forPath("/");
			} catch (Exception e) {
				throw new RuntimeException("Set ACL for the namespace path '" + cf.getNamespace() + "' failed.", e);
			}
		}

		return cf;
	}

	/**
	 * Returns whether {@link HighAvailabilityMode#ZOOKEEPER} is configured.
	 */
	public static boolean isZooKeeperRecoveryMode(Configuration flinkConf) {
		return HighAvailabilityMode.fromConfig(flinkConf).equals(HighAvailabilityMode.ZOOKEEPER);
	}

	/**
	 * Returns the configured ZooKeeper quorum (and removes whitespace, because ZooKeeper does not
	 * tolerate it).
	 */
	public static String getZooKeeperEnsemble(Configuration flinkConf)
			throws IllegalConfigurationException {

		String zkQuorum = flinkConf.getValue(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM);

		if (zkQuorum == null || StringUtils.isBlank(zkQuorum)) {
			throw new IllegalConfigurationException("No ZooKeeper quorum specified in config.");
		}

		// Remove all whitespace
		zkQuorum = zkQuorum.replaceAll("\\s+", "");

		return zkQuorum;
	}

	/**
	 * Creates a {@link ZooKeeperLeaderRetrievalService} instance.
	 *
	 * @param client        The {@link CuratorFramework} ZooKeeper client to use
	 * @param configuration {@link Configuration} object containing the configuration values
	 * @return {@link ZooKeeperLeaderRetrievalService} instance.
	 * @throws Exception
	 */
	public static ZooKeeperLeaderRetrievalService createLeaderRetrievalService(
		final CuratorFramework client,
		final Configuration configuration) throws Exception
	{
		return createLeaderRetrievalService(client, configuration, "");
	}

	/**
	 * Creates a {@link ZooKeeperLeaderRetrievalService} instance.
	 *
	 * @param client        The {@link CuratorFramework} ZooKeeper client to use
	 * @param configuration {@link Configuration} object containing the configuration values
	 * @param pathSuffix    The path suffix which we want to append
	 * @return {@link ZooKeeperLeaderRetrievalService} instance.
	 * @throws Exception
	 */
	public static ZooKeeperLeaderRetrievalService createLeaderRetrievalService(
		final CuratorFramework client,
		final Configuration configuration,
		final String pathSuffix) {
		String leaderPath = configuration.getString(
			HighAvailabilityOptions.HA_ZOOKEEPER_LEADER_PATH) + pathSuffix;

		return new ZooKeeperLeaderRetrievalService(client, leaderPath);
	}

	/**
	 * Creates a {@link ZooKeeperLeaderElectionService} instance.
	 *
	 * @param client        The {@link CuratorFramework} ZooKeeper client to use
	 * @param configuration {@link Configuration} object containing the configuration values
	 * @return {@link ZooKeeperLeaderElectionService} instance.
	 */
	public static ZooKeeperLeaderElectionService createLeaderElectionService(
			CuratorFramework client,
			Configuration configuration) throws Exception {

		return createLeaderElectionService(client, configuration, "");
	}

	/**
	 * Creates a {@link ZooKeeperLeaderElectionService} instance.
	 *
	 * @param client        The {@link CuratorFramework} ZooKeeper client to use
	 * @param configuration {@link Configuration} object containing the configuration values
	 * @param pathSuffix    The path suffix which we want to append
	 * @return {@link ZooKeeperLeaderElectionService} instance.
	 */
	public static ZooKeeperLeaderElectionService createLeaderElectionService(
			final CuratorFramework client,
			final Configuration configuration,
			final String pathSuffix) {
		final String latchPath = configuration.getString(
			HighAvailabilityOptions.HA_ZOOKEEPER_LATCH_PATH) + pathSuffix;
		final String leaderPath = configuration.getString(
			HighAvailabilityOptions.HA_ZOOKEEPER_LEADER_PATH) + pathSuffix;

		return new ZooKeeperLeaderElectionService(client, latchPath, leaderPath);
	}

	/**
	 * Creates a {@link ZooKeeperSubmittedJobGraphStore} instance.
	 *
	 * @param client        The {@link CuratorFramework} ZooKeeper client to use
	 * @param configuration {@link Configuration} object
	 * @param executor to run ZooKeeper callbacks
	 * @return {@link ZooKeeperSubmittedJobGraphStore} instance
	 * @throws Exception if the submitted job graph store cannot be created
	 */
	public static ZooKeeperSubmittedJobGraphStore createSubmittedJobGraphs(
			CuratorFramework client,
			Configuration configuration,
			Executor executor) throws Exception {

		checkNotNull(configuration, "Configuration");

		RetrievableStateStorageHelper stateStorage = createFileSystemStateStorage(
			configuration, HA_STORAGE_SUBMITTED_JOBGRAPH_PREFIX);

		// ZooKeeper submitted jobs root dir
		String zooKeeperSubmittedJobsPath = configuration.getString(HighAvailabilityOptions.HA_ZOOKEEPER_JOBGRAPHS_PATH);

		return new ZooKeeperSubmittedJobGraphStore(
				client, zooKeeperSubmittedJobsPath, stateStorage, executor);
	}

	/**
	 * Cleans up leftover ZooKeeper paths for given curator framework.
	 */
	public static void cleanupZooKeeperPaths(CuratorFramework client) throws Exception {
		deleteOwnedZNode(client);
		tryDeleteEmptyParentZNodes(client);
	}

	private static void deleteOwnedZNode(CuratorFramework client) throws Exception {
		// delete the HA_CLUSTER_ID znode which is owned by this cluster

		// Since we are using Curator version 2.12 there is a bug in deleting the children
		// if there is a concurrent delete operation. Therefore we need to add this retry
		// logic. See https://issues.apache.org/jira/browse/CURATOR-430 for more information.
		// The retry logic can be removed once we upgrade to Curator version >= 4.0.1.
		boolean zNodeDeleted = false;
		while (!zNodeDeleted) {
			try {
				client.delete().deletingChildrenIfNeeded().forPath("/");
				zNodeDeleted = true;
			} catch (KeeperException.NoNodeException ignored) {
				// concurrent delete operation. Try again.
				LOG.debug("Retrying to delete owned znode because of other concurrent delete operation.");
			}
		}
	}

	/**
	 * Tries to delete empty parent znodes.
	 *
	 * 

IMPORTANT: This method can be removed once all supported ZooKeeper versions * support the container {@link org.apache.zookeeper.CreateMode}. * * @throws Exception if the deletion fails for other reason than {@link KeeperException.NotEmptyException} */ private static void tryDeleteEmptyParentZNodes(CuratorFramework client) throws Exception { // try to delete the parent znodes if they are empty String remainingPath = getParentPath(getNormalizedPath(client.getNamespace())); final CuratorFramework nonNamespaceClient = client.usingNamespace(null); while (!isRootPath(remainingPath)) { try { nonNamespaceClient.delete().forPath(remainingPath); } catch (KeeperException.NotEmptyException ignored) { // We can only delete empty znodes break; } remainingPath = getParentPath(remainingPath); } } private static boolean isRootPath(String remainingPath) { return ZKPaths.PATH_SEPARATOR.equals(remainingPath); } @Nonnull private static String getNormalizedPath(String path) { return ZKPaths.makePath(path, ""); } @Nonnull private static String getParentPath(String path) { return ZKPaths.getPathAndNode(path).getPath(); } /** * Creates a {@link ZooKeeperCompletedCheckpointStore} instance. * * @param client The {@link CuratorFramework} ZooKeeper client to use * @param configuration {@link Configuration} object * @param jobId ID of job to create the instance for * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain * @param executor to run ZooKeeper callbacks * @return {@link ZooKeeperCompletedCheckpointStore} instance * @throws Exception if the completed checkpoint store cannot be created */ public static CompletedCheckpointStore createCompletedCheckpoints( CuratorFramework client, Configuration configuration, JobID jobId, int maxNumberOfCheckpointsToRetain, Executor executor) throws Exception { checkNotNull(configuration, "Configuration"); String checkpointsPath = configuration.getString( HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH); RetrievableStateStorageHelper stateStorage = createFileSystemStateStorage( configuration, HA_STORAGE_COMPLETED_CHECKPOINT); checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId); return new ZooKeeperCompletedCheckpointStore( maxNumberOfCheckpointsToRetain, client, checkpointsPath, stateStorage, executor); } /** * Creates a {@link ZooKeeperCheckpointIDCounter} instance. * * @param client The {@link CuratorFramework} ZooKeeper client to use * @param configuration {@link Configuration} object * @param jobId ID of job to create the instance for * @return {@link ZooKeeperCheckpointIDCounter} instance */ public static ZooKeeperCheckpointIDCounter createCheckpointIDCounter( CuratorFramework client, Configuration configuration, JobID jobId) { String checkpointIdCounterPath = configuration.getString( HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINT_COUNTER_PATH); checkpointIdCounterPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId); return new ZooKeeperCheckpointIDCounter(client, checkpointIdCounterPath); } /** * Creates a {@link FileSystemStateStorageHelper} instance. * * @param configuration {@link Configuration} object * @param prefix Prefix for the created files * @param Type of the state objects * @return {@link FileSystemStateStorageHelper} instance * @throws IOException if file system state storage cannot be created */ public static FileSystemStateStorageHelper createFileSystemStateStorage( Configuration configuration, String prefix) throws IOException { return new FileSystemStateStorageHelper(getClusterHighAvailabilityStoragePath(configuration), prefix); } /** * Get high availability storage path of current flink cluster. * @param configuration {@link Configuration} object * @return high availability storage path of current flink cluster */ public static String getClusterHighAvailabilityStoragePath(Configuration configuration) { String rootPath = configuration.getValue(HighAvailabilityOptions.HA_STORAGE_PATH); if (rootPath == null || StringUtils.isBlank(rootPath)) { throw new IllegalConfigurationException("Missing high-availability storage path for metadata." + " Specify via configuration key '" + HighAvailabilityOptions.HA_STORAGE_PATH + "'."); } else { final String clusterId = configuration.getValue(HighAvailabilityOptions.HA_CLUSTER_ID); rootPath += "/" + clusterId; return rootPath; } } public static String generateZookeeperPath(String root, String namespace) { if (!namespace.startsWith("/")) { namespace = '/' + namespace; } if (namespace.endsWith("/")) { namespace = namespace.substring(0, namespace.length() - 1); } if (root.endsWith("/")) { root = root.substring(0, root.length() - 1); } return root + namespace; } public static class SecureAclProvider implements ACLProvider { @Override public List getDefaultAcl() { return ZooDefs.Ids.CREATOR_ALL_ACL; } @Override public List getAclForPath(String path) { return ZooDefs.Ids.CREATOR_ALL_ACL; } } public enum ZkClientACLMode { CREATOR, OPEN; /** * Return the configured {@link ZkClientACLMode}. * * @param config The config to parse * @return Configured ACL mode or the default defined by {@link HighAvailabilityOptions#ZOOKEEPER_CLIENT_ACL} if not * configured. */ public static ZkClientACLMode fromConfig(Configuration config) { String aclMode = config.getString(HighAvailabilityOptions.ZOOKEEPER_CLIENT_ACL); if (aclMode == null || aclMode.equalsIgnoreCase(ZkClientACLMode.OPEN.name())) { return ZkClientACLMode.OPEN; } else if (aclMode.equalsIgnoreCase(ZkClientACLMode.CREATOR.name())) { return ZkClientACLMode.CREATOR; } else { String message = "Unsupported ACL option: [" + aclMode + "] provided"; LOG.error(message); throw new IllegalConfigurationException(message); } } } /** * Private constructor to prevent instantiation. */ private ZooKeeperUtils() { throw new RuntimeException(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy