All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.highavailability.zookeeper.ZooKeeperHaServices Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.highavailability.zookeeper;

import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.HighAvailabilityOptions;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.blob.BlobStore;
import org.apache.flink.runtime.blob.BlobStoreService;
import org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory;
import org.apache.flink.runtime.checkpoint.ZooKeeperCheckpointRecoveryFactory;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.highavailability.RunningJobsRegistry;
import org.apache.flink.runtime.jobmanager.SubmittedJobGraphStore;
import org.apache.flink.runtime.leaderelection.LeaderElectionService;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
import org.apache.flink.runtime.util.ZooKeeperUtils;
import org.apache.flink.util.ExceptionUtils;

import org.apache.curator.framework.CuratorFramework;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.concurrent.Executor;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * An implementation of the {@link HighAvailabilityServices} using Apache ZooKeeper.
 * The services store data in ZooKeeper's nodes as illustrated by teh following tree structure:
 * 
 * 
 * /flink
 *      +/cluster_id_1/resource_manager_lock
 *      |            |
 *      |            +/job-id-1/job_manager_lock
 *      |            |         /checkpoints/latest
 *      |            |                     /latest-1
 *      |            |                     /latest-2
 *      |            |
 *      |            +/job-id-2/job_manager_lock
 *      |      
 *      +/cluster_id_2/resource_manager_lock
 *                   |
 *                   +/job-id-1/job_manager_lock
 *                            |/checkpoints/latest
 *                            |            /latest-1
 *                            |/persisted_job_graph
 * 
* *

The root path "/flink" is configurable via the option {@link HighAvailabilityOptions#HA_ZOOKEEPER_ROOT}. * This makes sure Flink stores its data under specific subtrees in ZooKeeper, for example to * accommodate specific permission. * *

The "cluster_id" part identifies the data stored for a specific Flink "cluster". * This "cluster" can be either a standalone or containerized Flink cluster, or it can be job * on a framework like YARN or Mesos (in a "per-job-cluster" mode). * *

In case of a "per-job-cluster" on YARN or Mesos, the cluster-id is generated and configured * automatically by the client or dispatcher that submits the Job to YARN or Mesos. * *

In the case of a standalone cluster, that cluster-id needs to be configured via * {@link HighAvailabilityOptions#HA_CLUSTER_ID}. All nodes with the same cluster id will join the same * cluster and participate in the execution of the same set of jobs. */ public class ZooKeeperHaServices implements HighAvailabilityServices { private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperHaServices.class); private static final String RESOURCE_MANAGER_LEADER_PATH = "/resource_manager_lock"; private static final String DISPATCHER_LEADER_PATH = "/dispatcher_lock"; private static final String JOB_MANAGER_LEADER_PATH = "/job_manager_lock"; private static final String REST_SERVER_LEADER_PATH = "/rest_server_lock"; // ------------------------------------------------------------------------ /** The ZooKeeper client to use */ private final CuratorFramework client; /** The executor to run ZooKeeper callbacks on */ private final Executor executor; /** The runtime configuration */ private final Configuration configuration; /** The zookeeper based running jobs registry */ private final RunningJobsRegistry runningJobsRegistry; /** Store for arbitrary blobs */ private final BlobStoreService blobStoreService; public ZooKeeperHaServices( CuratorFramework client, Executor executor, Configuration configuration, BlobStoreService blobStoreService) { this.client = checkNotNull(client); this.executor = checkNotNull(executor); this.configuration = checkNotNull(configuration); this.runningJobsRegistry = new ZooKeeperRunningJobsRegistry(client, configuration); this.blobStoreService = checkNotNull(blobStoreService); } // ------------------------------------------------------------------------ // Services // ------------------------------------------------------------------------ @Override public LeaderRetrievalService getResourceManagerLeaderRetriever() { return ZooKeeperUtils.createLeaderRetrievalService(client, configuration, RESOURCE_MANAGER_LEADER_PATH); } @Override public LeaderRetrievalService getDispatcherLeaderRetriever() { return ZooKeeperUtils.createLeaderRetrievalService(client, configuration, DISPATCHER_LEADER_PATH); } @Override public LeaderRetrievalService getJobManagerLeaderRetriever(JobID jobID) { return ZooKeeperUtils.createLeaderRetrievalService(client, configuration, getPathForJobManager(jobID)); } @Override public LeaderRetrievalService getJobManagerLeaderRetriever(JobID jobID, String defaultJobManagerAddress) { return getJobManagerLeaderRetriever(jobID); } @Override public LeaderRetrievalService getWebMonitorLeaderRetriever() { return ZooKeeperUtils.createLeaderRetrievalService(client, configuration, REST_SERVER_LEADER_PATH); } @Override public LeaderElectionService getResourceManagerLeaderElectionService() { return ZooKeeperUtils.createLeaderElectionService(client, configuration, RESOURCE_MANAGER_LEADER_PATH); } @Override public LeaderElectionService getDispatcherLeaderElectionService() { return ZooKeeperUtils.createLeaderElectionService(client, configuration, DISPATCHER_LEADER_PATH); } @Override public LeaderElectionService getJobManagerLeaderElectionService(JobID jobID) { return ZooKeeperUtils.createLeaderElectionService(client, configuration, getPathForJobManager(jobID)); } @Override public LeaderElectionService getWebMonitorLeaderElectionService() { return ZooKeeperUtils.createLeaderElectionService(client, configuration, REST_SERVER_LEADER_PATH); } @Override public CheckpointRecoveryFactory getCheckpointRecoveryFactory() { return new ZooKeeperCheckpointRecoveryFactory(client, configuration, executor); } @Override public SubmittedJobGraphStore getSubmittedJobGraphStore() throws Exception { return ZooKeeperUtils.createSubmittedJobGraphs(client, configuration, executor); } @Override public RunningJobsRegistry getRunningJobsRegistry() { return runningJobsRegistry; } @Override public BlobStore createBlobStore() throws IOException { return blobStoreService; } // ------------------------------------------------------------------------ // Shutdown // ------------------------------------------------------------------------ @Override public void close() throws Exception { Throwable exception = null; try { blobStoreService.close(); } catch (Throwable t) { exception = t; } internalClose(); if (exception != null) { ExceptionUtils.rethrowException(exception, "Could not properly close the ZooKeeperHaServices."); } } @Override public void closeAndCleanupAllData() throws Exception { LOG.info("Close and clean up all data for ZooKeeperHaServices."); Throwable exception = null; try { blobStoreService.closeAndCleanupAllData(); } catch (Throwable t) { exception = t; } Path haStorageDir = new Path(ZooKeeperUtils.getClusterHighAvailabilityStoragePath(configuration)); FileSystem fs = FileSystem.get(haStorageDir.toUri()); if (!fs.delete(haStorageDir, true)) { LOG.warn("Failed to cleanup high-availability storage path, " + haStorageDir.toString()); } internalClose(); if (exception != null) { ExceptionUtils.rethrowException(exception, "Could not properly close and clean up all data of ZooKeeperHaServices."); } } /** * Closes components which don't distinguish between close and closeAndCleanupAllData */ private void internalClose() { client.close(); } // ------------------------------------------------------------------------ // Utilities // ------------------------------------------------------------------------ private static String getPathForJobManager(final JobID jobID) { return "/" + jobID + JOB_MANAGER_LEADER_PATH; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy