All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.checkpoint.ZooKeeperCheckpointIDCounter Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint;

import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.shared.SharedCount;
import org.apache.curator.framework.recipes.shared.VersionedValue;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * {@link CheckpointIDCounter} instances for JobManagers running in {@link HighAvailabilityMode#ZOOKEEPER}.
 *
 * 

Each counter creates a ZNode: *

 * +----O /flink/checkpoint-counter/<job-id> 1 [persistent]
 * .
 * .
 * .
 * +----O /flink/checkpoint-counter/<job-id> N [persistent]
 * 
* *

The checkpoints IDs are required to be ascending (per job). In order to guarantee this in case * of job manager failures we use ZooKeeper to have a shared counter across job manager instances. */ public class ZooKeeperCheckpointIDCounter implements CheckpointIDCounter { private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperCheckpointIDCounter.class); /** Curator ZooKeeper client */ private final CuratorFramework client; /** Path of the shared count */ private final String counterPath; /** Curator recipe for shared counts */ private final SharedCount sharedCount; /** Connection state listener to monitor the client connection */ private final SharedCountConnectionStateListener connStateListener = new SharedCountConnectionStateListener(); private final Object startStopLock = new Object(); private boolean isStarted; /** * Creates a {@link ZooKeeperCheckpointIDCounter} instance. * * @param client Curator ZooKeeper client * @param counterPath ZooKeeper path for the counter. It's sufficient to have a path per-job. */ public ZooKeeperCheckpointIDCounter(CuratorFramework client, String counterPath) { this.client = checkNotNull(client, "Curator client"); this.counterPath = checkNotNull(counterPath, "Counter path"); this.sharedCount = new SharedCount(client, counterPath, 1); } @Override public void start() throws Exception { synchronized (startStopLock) { if (!isStarted) { sharedCount.start(); client.getConnectionStateListenable().addListener(connStateListener); isStarted = true; } } } @Override public void shutdown(JobStatus jobStatus) throws Exception { synchronized (startStopLock) { if (isStarted) { LOG.info("Shutting down."); sharedCount.close(); client.getConnectionStateListenable().removeListener(connStateListener); if (jobStatus.isGloballyTerminalState()) { LOG.info("Removing {} from ZooKeeper", counterPath); client.delete().deletingChildrenIfNeeded().inBackground().forPath(counterPath); } isStarted = false; } } } @Override public long getAndIncrement() throws Exception { while (true) { ConnectionState connState = connStateListener.getLastState(); if (connState != null) { throw new IllegalStateException("Connection state: " + connState); } VersionedValue current = sharedCount.getVersionedValue(); int newCount = current.getValue() + 1; if (newCount < 0) { // overflow and wrap around throw new Exception("Checkpoint counter overflow. ZooKeeper checkpoint counter only supports " + "checkpoints Ids up to " + Integer.MAX_VALUE); } if (sharedCount.trySetCount(current, newCount)) { return current.getValue(); } } } @Override public void setCount(long newId) throws Exception { ConnectionState connState = connStateListener.getLastState(); if (connState != null) { throw new IllegalStateException("Connection state: " + connState); } if (newId > Integer.MAX_VALUE) { throw new IllegalArgumentException("ZooKeeper checkpoint counter only supports " + "checkpoints Ids up to " + Integer.MAX_VALUE + ", but given value is" + newId); } sharedCount.setCount((int) newId); } /** * Connection state listener. In case of {@link ConnectionState#SUSPENDED} or {@link * ConnectionState#LOST} we are not guaranteed to read a current count from ZooKeeper. */ private static class SharedCountConnectionStateListener implements ConnectionStateListener { private volatile ConnectionState lastState; @Override public void stateChanged(CuratorFramework client, ConnectionState newState) { if (newState == ConnectionState.SUSPENDED || newState == ConnectionState.LOST) { lastState = newState; } } private ConnectionState getLastState() { return lastState; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy