All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.finagle.memcached.ZookeeperStateMonitor.scala Maven / Gradle / Ivy

There is a newer version: 24.2.0
Show newest version
package com.twitter.finagle.memcached

import com.twitter.concurrent.Broker
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.common.zookeeper.ZooKeeperClient
import com.twitter.finagle.service.Backoff
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.{Duration, FuturePool, JavaTimer, Return, Throw}
import org.apache.zookeeper.Watcher.Event.KeeperState
import org.apache.zookeeper.{WatchedEvent, Watcher}
import scala.collection.JavaConverters._

object ZookeeperStateMonitor {
  val DefaultZkConnectionRetryBackoff =
    (Backoff.exponential(1.second, 2) take 6) ++ Backoff.const(60.seconds)
  val DefaultFuturePool = FuturePool.unboundedPool
  val DefaultTimer = new JavaTimer(isDaemon = true)
  val DefaultZKWaitTimeout = 10.seconds
}

/**
 * A zk monitor trait that assists with monitoring a given zk path for any node data change,
 * in which the provided zk data handling implementation will be invoked.
 *
 * This monitor will maintain a queue so that every work item triggered by zk event will be
 * processed in an order with a back off policy. It also set-up a zookeeper connection watcher
 * by default to re-set the data change watcher even during zk re-connect.
 *
 * The monitor will set-up all watcher properly kick off the loop to process future event;
 * you can also invoke loadZKData() in your class anytime to force reading zk data and apply it.
 *
 * Example use cases are:
 * - zookeeper based CachePoolCluster uses this to monitor cache pool members change
 * - zookeeper based MigrationClient uses this ot monitor migration state transitioning
 */
trait ZookeeperStateMonitor {
  protected val zkPath: String
  protected val zkClient: ZooKeeperClient
  protected val statsReceiver: StatsReceiver

  import ZookeeperStateMonitor._

  private[this] val zkWorkFailedCounter = statsReceiver.counter("zkWork.failed")
  private[this] val zkWorkSucceededCounter = statsReceiver.counter("zkWork.succeeded")
  private[this] val loadZKDataCounter = statsReceiver.counter("loadZKData")
  private[this] val loadZKChildrenCounter = statsReceiver.counter("loadZKChildren")
  private[this] val reconnectZKCounter = statsReceiver.counter("reconnectZK")

  private[this] val zookeeperWorkQueue = new Broker[() => Unit]

  // zk watcher for connection, data, children event
  private[this] val zkWatcher: Watcher = new Watcher() {
    // NOTE: Ensure that the processing of events is not a blocking operation.
    override def process(event: WatchedEvent) = {
      event.getState match {
        // actively trying to re-establish the zk connection (hence re-register the zk path
        // data watcher) whenever an zookeeper connection expired or disconnected. We could also
        // only do this when SyncConnected happens, but that would be assuming other components
        // sharing the zk client (e.g. ZookeeperServerSetCluster) will always actively attempts
        // to re-establish the zk connection. For now, I'm making it actively re-establishing
        // the connection itself here.
        case (KeeperState.Disconnected | KeeperState.Expired)
            if event.getType == Watcher.Event.EventType.None =>
          statsReceiver.counter("zkConnectionEvent." + event.getState).incr()
          zookeeperWorkQueue ! reconnectZK
        case KeeperState.SyncConnected =>
          statsReceiver.counter("zkNodeChangeEvent." + event.getType).incr()
          event.getType match {
            case Watcher.Event.EventType.NodeDataChanged =>
              zookeeperWorkQueue ! loadZKData
            case Watcher.Event.EventType.NodeChildrenChanged =>
              zookeeperWorkQueue ! loadZKChildren
            case _ =>
          }
        case _ =>
      }
    }
  }

  /**
   * Read work items of the broker and schedule the work with future pool. If the scheduled work
   * failed, it will repeatedly retry itself in a backoff manner (with a timer) until succeeded,
   * which then recursively call this method to process the next work item.
   *
   * This function guarantees that at any given time there will be only one thread (future pool thread)
   * blocking on zookeeper IO work. Multiple ZK connection events or cache pool change events would only
   * queue up the work, and each work will be picked up only after the previous one finished successfully
   */
  private[this] def loopZookeeperWork(): Unit = {
    def scheduleReadCachePoolConfig(
      op: () => Unit,
      backoff: Stream[Duration] = DefaultZkConnectionRetryBackoff
    ): Unit = {
      DefaultFuturePool {
        op()
      }.respond {
        case Return(_) =>
          zkWorkSucceededCounter.incr()
          loopZookeeperWork()
        case Throw(ex) =>
          zkWorkFailedCounter.incr()
          backoff match {
            case wait #:: rest =>
              DefaultTimer.doLater(wait) {
                scheduleReadCachePoolConfig(op, rest)
              }
          }
      }
    }

    // get one work item off the broker and schedule it into the future pool
    zookeeperWorkQueue.recv.sync().respond {
      case Return(op: (() => Unit)) =>
        scheduleReadCachePoolConfig(op)
      case _ =>
    }
  }

  /**
   * Load the zookeeper node data as well as leaving a data watch, then invoke the
   * applyZKData implementation to process the data string.
   */
  def applyZKData(data: Array[Byte]): Unit
  def loadZKData =
    () =>
      synchronized {
        loadZKDataCounter.incr()

        // read cache pool config data and leave a node data watch
        val data = zkClient
          .get(DefaultZKWaitTimeout)
          .getData(zkPath, true, null)

        applyZKData(data)
      }

  /**
   * Load the zookeeper node children as well as leaving a children watch, then invoke the
   * applyZKChildren implementation to process the children list.
   */
  def applyZKChildren(children: List[String]): Unit = {} // no-op by default
  def loadZKChildren =
    () =>
      synchronized {
        loadZKChildrenCounter.incr()

        // get children list and leave a node children watch
        val children = zkClient
          .get(DefaultZKWaitTimeout)
          .getChildren(zkPath, true, null)

        applyZKChildren(children.asScala.toList)
      }

  /**
   * Reconnect to the zookeeper, this maybe invoked when zookeeper connection expired and the
   * node data watcher previously registered got dropped, hence re-attache the data wather here.
   */
  def reconnectZK =
    () =>
      synchronized {
        reconnectZKCounter.incr()

        // reset watch for node data and children
        val data = zkClient
          .get(DefaultZKWaitTimeout)
          .getData(zkPath, true, null)
        val children = zkClient
          .get(DefaultZKWaitTimeout)
          .getChildren(zkPath, true, null)
      }

  // Register top-level connection watcher to monitor zk change.
  // This watcher will live across different zk connection
  zkClient.register(zkWatcher)

  // attach data/children change event watcher to the zk client for the first time during constructing
  zookeeperWorkQueue ! loadZKData
  zookeeperWorkQueue ! loadZKChildren

  // Kick off the loop to process zookeeper work queue
  loopZookeeperWork()
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy