All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.heartbeat.HeartbeatMonitorImpl Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.heartbeat;

import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.concurrent.ScheduledExecutor;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

/**
 * The default implementation of {@link HeartbeatMonitor}.
 *
 * @param  Type of the payload being sent to the associated heartbeat target
 */
public class HeartbeatMonitorImpl implements HeartbeatMonitor, Runnable {

    private static final Logger LOG = LoggerFactory.getLogger(HeartbeatMonitorImpl.class);

    /** Resource ID of the monitored heartbeat target. */
    private final ResourceID resourceID;

    /** Associated heartbeat target. */
    private final HeartbeatTarget heartbeatTarget;

    private final ScheduledExecutor scheduledExecutor;

    /** Listener which is notified about heartbeat timeouts. */
    private final HeartbeatListener heartbeatListener;

    /** Maximum heartbeat timeout interval. */
    private final long heartbeatTimeoutIntervalMs;

    private final int failedRpcRequestsUntilUnreachable;

    private volatile ScheduledFuture futureTimeout;

    private final AtomicReference state = new AtomicReference<>(State.RUNNING);

    private final AtomicInteger numberFailedRpcRequestsSinceLastSuccess = new AtomicInteger(0);

    private volatile long lastHeartbeat;

    HeartbeatMonitorImpl(
            ResourceID resourceID,
            HeartbeatTarget heartbeatTarget,
            ScheduledExecutor scheduledExecutor,
            HeartbeatListener heartbeatListener,
            long heartbeatTimeoutIntervalMs,
            int failedRpcRequestsUntilUnreachable) {

        this.resourceID = Preconditions.checkNotNull(resourceID);
        this.heartbeatTarget = Preconditions.checkNotNull(heartbeatTarget);
        this.scheduledExecutor = Preconditions.checkNotNull(scheduledExecutor);
        this.heartbeatListener = Preconditions.checkNotNull(heartbeatListener);

        Preconditions.checkArgument(
                heartbeatTimeoutIntervalMs > 0L,
                "The heartbeat timeout interval has to be larger than 0.");
        this.heartbeatTimeoutIntervalMs = heartbeatTimeoutIntervalMs;

        Preconditions.checkArgument(
                failedRpcRequestsUntilUnreachable > 0 || failedRpcRequestsUntilUnreachable == -1,
                "The number of failed heartbeat RPC requests has to be larger than 0 or -1 (deactivated).");
        this.failedRpcRequestsUntilUnreachable = failedRpcRequestsUntilUnreachable;

        lastHeartbeat = 0L;

        resetHeartbeatTimeout(heartbeatTimeoutIntervalMs);
    }

    @Override
    public HeartbeatTarget getHeartbeatTarget() {
        return heartbeatTarget;
    }

    @Override
    public ResourceID getHeartbeatTargetId() {
        return resourceID;
    }

    @Override
    public long getLastHeartbeat() {
        return lastHeartbeat;
    }

    @Override
    public void reportHeartbeatRpcFailure() {
        final int failedRpcRequestsSinceLastSuccess =
                numberFailedRpcRequestsSinceLastSuccess.incrementAndGet();

        if (isHeartbeatRpcFailureDetectionEnabled()
                && failedRpcRequestsSinceLastSuccess >= failedRpcRequestsUntilUnreachable) {
            if (state.compareAndSet(State.RUNNING, State.UNREACHABLE)) {
                LOG.debug(
                        "Mark heartbeat target {} as unreachable because {} consecutive heartbeat RPCs have failed.",
                        resourceID,
                        failedRpcRequestsSinceLastSuccess);

                cancelTimeout();
                heartbeatListener.notifyTargetUnreachable(resourceID);
            }
        }
    }

    private boolean isHeartbeatRpcFailureDetectionEnabled() {
        return failedRpcRequestsUntilUnreachable > 0;
    }

    @Override
    public void reportHeartbeatRpcSuccess() {
        numberFailedRpcRequestsSinceLastSuccess.set(0);
    }

    @Override
    public void reportHeartbeat() {
        lastHeartbeat = System.currentTimeMillis();
        resetHeartbeatTimeout(heartbeatTimeoutIntervalMs);
    }

    @Override
    public void cancel() {
        // we can only cancel if we are in state running
        if (state.compareAndSet(State.RUNNING, State.CANCELED)) {
            cancelTimeout();
        }
    }

    @Override
    public void run() {
        // The heartbeat has timed out if we're in state running
        if (state.compareAndSet(State.RUNNING, State.TIMEOUT)) {
            heartbeatListener.notifyHeartbeatTimeout(resourceID);
        }
    }

    public boolean isCanceled() {
        return state.get() == State.CANCELED;
    }

    void resetHeartbeatTimeout(long heartbeatTimeout) {
        if (state.get() == State.RUNNING) {
            cancelTimeout();

            futureTimeout =
                    scheduledExecutor.schedule(this, heartbeatTimeout, TimeUnit.MILLISECONDS);

            // Double check for concurrent accesses (e.g. a firing of the scheduled future)
            if (state.get() != State.RUNNING) {
                cancelTimeout();
            }
        }
    }

    private void cancelTimeout() {
        if (futureTimeout != null) {
            futureTimeout.cancel(true);
        }
    }

    private enum State {
        RUNNING,
        TIMEOUT,
        UNREACHABLE,
        CANCELED
    }

    /**
     * The factory that instantiates {@link HeartbeatMonitorImpl}.
     *
     * @param  Type of the outgoing heartbeat payload
     */
    static class Factory implements HeartbeatMonitor.Factory {

        @Override
        public HeartbeatMonitor createHeartbeatMonitor(
                ResourceID resourceID,
                HeartbeatTarget heartbeatTarget,
                ScheduledExecutor mainThreadExecutor,
                HeartbeatListener heartbeatListener,
                long heartbeatTimeoutIntervalMs,
                int failedRpcRequestsUntilUnreachable) {

            return new HeartbeatMonitorImpl<>(
                    resourceID,
                    heartbeatTarget,
                    mainThreadExecutor,
                    heartbeatListener,
                    heartbeatTimeoutIntervalMs,
                    failedRpcRequestsUntilUnreachable);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy