All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.checkpoint.ZooKeeperCheckpointIDCounter Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint;

import org.apache.flink.api.common.JobStatus;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;

import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.shared.SharedCount;
import org.apache.curator.framework.recipes.shared.VersionedValue;
import org.apache.curator.framework.state.ConnectionState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.concurrent.GuardedBy;

import java.util.Optional;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * {@link CheckpointIDCounter} instances for JobManagers running in {@link HighAvailabilityMode#ZOOKEEPER}.
 *
 * 

Each counter creates a ZNode: *

 * +----O /flink/checkpoint-counter/<job-id> 1 [persistent]
 * .
 * .
 * .
 * +----O /flink/checkpoint-counter/<job-id> N [persistent]
 * 
* *

The checkpoints IDs are required to be ascending (per job). In order to guarantee this in case * of job manager failures we use ZooKeeper to have a shared counter across job manager instances. */ public class ZooKeeperCheckpointIDCounter implements CheckpointIDCounter { private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperCheckpointIDCounter.class); /** Curator ZooKeeper client. */ private final CuratorFramework client; /** Path of the shared count. */ private final String counterPath; /** Curator recipe for shared counts. */ private final SharedCount sharedCount; private final LastStateConnectionStateListener connectionStateListener; private final Object startStopLock = new Object(); @GuardedBy("startStopLock") private boolean isStarted; /** * Creates a {@link ZooKeeperCheckpointIDCounter} instance. * * @param client Curator ZooKeeper client * @param counterPath ZooKeeper path for the counter. It's sufficient to have a path per-job. */ public ZooKeeperCheckpointIDCounter(CuratorFramework client, String counterPath, LastStateConnectionStateListener connectionStateListener) { this.client = checkNotNull(client, "Curator client"); this.counterPath = checkNotNull(counterPath, "Counter path"); this.sharedCount = new SharedCount(client, counterPath, 1); this.connectionStateListener = connectionStateListener; } @Override public void start() throws Exception { synchronized (startStopLock) { if (!isStarted) { sharedCount.start(); client.getConnectionStateListenable().addListener(connectionStateListener); isStarted = true; } } } @Override public void shutdown(JobStatus jobStatus) throws Exception { synchronized (startStopLock) { if (isStarted) { LOG.info("Shutting down."); sharedCount.close(); client.getConnectionStateListenable().removeListener(connectionStateListener); if (jobStatus.isGloballyTerminalState()) { LOG.info("Removing {} from ZooKeeper", counterPath); client.delete().deletingChildrenIfNeeded().inBackground().forPath(counterPath); } isStarted = false; } } } @Override public long getAndIncrement() throws Exception { while (true) { checkConnectionState(); VersionedValue current = sharedCount.getVersionedValue(); int newCount = current.getValue() + 1; if (newCount < 0) { // overflow and wrap around throw new Exception("Checkpoint counter overflow. ZooKeeper checkpoint counter only supports " + "checkpoints Ids up to " + Integer.MAX_VALUE); } if (sharedCount.trySetCount(current, newCount)) { return current.getValue(); } } } @Override public long get() { checkConnectionState(); return sharedCount.getVersionedValue().getValue(); } @Override public void setCount(long newId) throws Exception { checkConnectionState(); if (newId > Integer.MAX_VALUE) { throw new IllegalArgumentException("ZooKeeper checkpoint counter only supports " + "checkpoints Ids up to " + Integer.MAX_VALUE + ", but given value is" + newId); } sharedCount.setCount((int) newId); } private void checkConnectionState() { final Optional optionalLastState = connectionStateListener.getLastState(); optionalLastState.ifPresent(lastState -> { if (lastState != ConnectionState.CONNECTED && lastState != ConnectionState.RECONNECTED) { throw new IllegalStateException("Connection state: " + lastState); } }); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy