All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.hekate.cluster.health.FailureDetector Maven / Gradle / Ivy

/*
 * Copyright 2022 The Hekate Project
 *
 * The Hekate Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */

package io.hekate.cluster.health;

import io.hekate.cluster.ClusterAddress;
import io.hekate.cluster.ClusterService;
import io.hekate.cluster.ClusterServiceFactory;
import io.hekate.core.HekateException;
import java.util.Collection;
import java.util.Set;

/**
 * « start hereCluster nodes failure detector.
 *
 * 

* Implementations of this interface are responsible for providing failure detection logic to the {@link ClusterService}. * Typically this logic is based on heartbeat messages exchange between cluster nodes, however there is no hard restriction and any other * algorithms can be used. *

* *

* Below are the key points of how this interface is used by the cluster service: *

* *
    *
  • When cluster service starts or if cluster service detects that there are changes in the cluster topology it calls {@link * #update(Set)} method so that failure detector could update its internal state of monitored nodes.
  • * *
  • Once per {@link #heartbeatInterval() heartbeat interval} cluster service calls {@link #isAlive(ClusterAddress)} method to check * if particular remote node is alive. If {@code false} is returned by this method then such node will be marked as suspected to be failed * and this information will be shared with other cluster members. If node failure is suspected by the {@link #failureQuorum()} * amount of nodes then such node will be marked as failed and will be removed from the cluster.
  • * *
  • Once per {@link #heartbeatInterval() heartbeat interval} cluster service calls {@link #heartbeatTick()} method. If this method * returns a non-empty list of cluster node addresses then heartbeat request message will be sent to each of those nodes.
  • * *
  • When cluster service receives a heartbeat request message from a remote node then it calls #{@link * #onHeartbeatRequest(ClusterAddress)} method. If {@code true} is returned by this method then heartbeat reply will be sent back to * the originator node. Once cluster service of the originator node receives such a reply it calls {@link * #onHeartbeatReply(ClusterAddress)} method.
  • *
* *

* Implementations of this interface can be registered via {@link ClusterServiceFactory#setFailureDetector(FailureDetector)} method. *

* *

* For the default implementation of this interface please see {@link DefaultFailureDetector}. *

* * @see DefaultFailureDetector * @see ClusterServiceFactory#setFailureDetector(FailureDetector) */ public interface FailureDetector { /** * Initialized this failure detector with the runtime context. * * @param context Context. * * @throws HekateException If this failure detector couldn't be initialized. */ void initialize(FailureDetectorContext context) throws HekateException; /** * Returns the time interval in milliseconds between heartbeat sending rounds (see {@link #heartbeatTick()}). * *

* If the returned value if less than or equals to zero then health monitoring will be completely disabled and {@link * #heartbeatTick()}/{@link #isAlive(ClusterAddress)} methods will never be called. *

* * @return Time interval in milliseconds between heartbeat sending rounds. */ long heartbeatInterval(); /** * Return the amount of nodes that should agree on some particular node failure before removing such node from the cluster. * *

* The value of this parameter is expected to be greater than or equals to 1. If values is less then 1 then it will be automatically * adjusted to 1. *

* * @return Amount of nodes that should agree on some particular node failure before removing such node from the cluster. */ int failureQuorum(); /** * Terminates this failure detector. */ void terminate(); /** * Returns {@code true} if cluster node at the specified address is known to be alive. Returns {@code false} if node is considered to * be failed. * * @param node Node address. * * @return {@code true} if node is alive or {@code false} if node is considered to be failed. */ boolean isAlive(ClusterAddress node); /** * Updates this failure detector with the latest information about all known cluster nodes addresses (including local node address). * *

* Note that the specified addresses set can include nodes that just started joining and are not within cluster service's {@link * ClusterService#topology() topology}. *

* * @param nodes Cluster node addresses. */ void update(Set nodes); /** * Runs a heartbeat tick and returns a set of cluster node addresses that should received a heartbeat request message. * *

* The time interval between heartbeat ticks is controlled by {@link #heartbeatInterval()} method. *

* * @return Set of cluster node addresses for heartbeat request message sending. * * @see #onHeartbeatRequest(ClusterAddress) */ Collection heartbeatTick(); /** * Notifies this failure detector on heartbeat request message form a remote node. Returns a boolean flag indicating * whether a heartbeat reply should be send ({@code true}) or heartbeat replies are not supported ({@code false}). * * @param from Address of the heartbeat request sender node. * * @return {@code true} if heartbeat reply should be send back to the requester. * * @see #heartbeatTick() * @see #onHeartbeatReply(ClusterAddress) */ boolean onHeartbeatRequest(ClusterAddress from); /** * Notifies this failure detector on heartbeat reply message from a remote node. * * @param node Address of heartbeat reply sender node. */ void onHeartbeatReply(ClusterAddress node); /** * Notifies this failure detector upon failure while trying to connect to a remote node. * * @param node Address of a failed node. */ void onConnectFailure(ClusterAddress node); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy