
com.twitter.finagle.serverset2.ZkSession.scala Maven / Gradle / Ivy
package com.twitter.finagle.serverset2
import com.twitter.concurrent.AsyncSemaphore
import com.twitter.conversions.time._
import com.twitter.finagle.serverset2.client._
import com.twitter.finagle.stats._
import com.twitter.io.Buf
import com.twitter.logging.Logger
import com.twitter.util._
import scala.collection.concurrent
/**
* A representation of a ZooKeeper session based on asynchronous primitives such
* as [[com.twitter.util.Future]], and [[com.twitter.util.Var]], and
* [[com.twitter.util.Activity]].
*
* Session operations are as in Apache Zookeeper, but represents pending results
* with [[com.twitter.util.Future Futures]]; watches and session states are
* represented with a [[com.twitter.util.Var]].
*/
private[serverset2] class ZkSession(
retryStream: RetryStream,
watchedZk: Watched[ZooKeeperReader],
statsReceiver: StatsReceiver
)(implicit timer: Timer) {
import ZkSession.logger
/** The dynamic `WatchState` of this `ZkSession` instance. */
val state: Var[WatchState] = watchedZk.state
private[this] val unexpectedExceptions = new CategorizingExceptionStatsHandler(_ => Some("unexpected_exceptions"))
private val zkr: ZooKeeperReader = watchedZk.value
// If the zookeeper cluster is under duress, there can be 100's of thousands of clients
// attempting to read and write at once. Limit to a (fairly large) concurrent request cap.
// Use a semaphore (versus explicit rate limiting) to approximate the throughput of the cluster.
// N.B. this semaphore has no max-waiters limit. This could lead to an OOME if the zk operations
// never complete. This is preferable to handling and re-queuing (via future.sleep etc)
// the error if an arbitrary max-limit is set.
private val limiter = new AsyncSemaphore(100)
private val waitersGauge = statsReceiver.addGauge("numWaiters") { limiter.numWaiters }
private def limit[T](f: => Future[T]): Future[T] =
limiter.acquire().flatMap { permit =>
f.ensure {
// don't release the permit until f is complete
permit.release()
}
}
private def retryWithDelay[T](f: => Future[T]): Future[T] =
Future.sleep(retryStream.next()).before(f)
// Track a timestamp for the last time we received a good update for
// a particular zookeeper child watch. All servers should be updated within the same approximate
// time. If a server has a different serverset size than its peers, this gauge will show
// us it is because it is not receiving updates.
@volatile var watchUpdateGauges = List.empty[Gauge]
private val lastGoodUpdate = new concurrent.TrieMap[String, Long]
private def noteGoodChildWatch(path: String): Unit = {
lastGoodUpdate.put(path, Time.now.inLongSeconds) match {
case None =>
// if there was no previous value, ensure we have a gauge
synchronized {
watchUpdateGauges ::= statsReceiver.addGauge("last_watch_update", path) {
Time.now.inLongSeconds - lastGoodUpdate.getOrElse(path, 0L)
}
}
case _ => //gauge is already there
}
}
/**
* Invoke a `Future[T]`-producing operation, retrying on
* [[com.twitter.finagle.serverset2.client.KeeperException.ConnectionLoss]]
* according to a backoff schedule defined by [[retryStream]]. The operation itself
* will be limited by the session-level semaphore.
*/
private def safeRetry[T](go: => Future[T]): Future[T] = {
def loop(): Future[T] =
limit { go }.rescue {
case exc: KeeperException.ConnectionLoss =>
logger.warning(s"ConnectionLoss to Zookeeper host. Session $sessionIdAsHex. Retrying")
retryWithDelay { loop() }
}
loop()
}
/**
* A persistent operation: reissue a watched operation every
* time the watch fires, applying safe retries when possible.
*
* The returned Activity is asynchronous: watches aren't reissued
* when the Activity is no longer observed.
*/
private[serverset2] def watchedOperation[T](go: => Future[Watched[T]]): Activity[T] =
Activity(Var.async[Activity.State[T]](Activity.Pending) { u =>
@volatile var closed = false
def loop(): Future[Unit] = {
if (!closed) safeRetry(go) respond {
case Throw([email protected](_)) =>
// don't retry. The session has expired while trying to set the watch.
// In case our activity is still active, notify the listener
u() = Activity.Failed(e)
case Throw(exc) =>
logger.error(s"Operation failed with $exc. Session $sessionIdAsHex")
u() = Activity.Failed(exc)
retryWithDelay { loop() }
case Return(Watched(value, state)) =>
val ok = Activity.Ok(value)
retryStream.reset()
u() = ok
state.changes.respond {
case WatchState.Pending =>
// Ignore updates WatchState is Pending.
case WatchState.Determined(_) =>
// Note: since the watch transitioned to determined, we know
// that this observation will produce no more values, so there's
// no need to apply concurrency control to the subsequent
// branches.
loop()
case WatchState.SessionState(sessionState)
if sessionState == SessionState.ConnectedReadOnly |
sessionState == SessionState.SaslAuthenticated |
sessionState == SessionState.SyncConnected =>
u() = ok
logger.info(s"Reacquiring watch on $sessionState. Session: $sessionIdAsHex")
// We may have lost or never set our watch correctly. Retry to ensure we stay connected
retryWithDelay { loop() }
case WatchState.SessionState(SessionState.Expired) =>
u() = Activity.Failed(new Exception("session expired"))
// Do NOT retry here as the session has expired. We expect the watcher of this
// ZkSession to retry at this point (See [[ZkSession.retrying]]).
// Disconnected, NoSyncConnected
case WatchState.SessionState(sessionState)
if sessionState == SessionState.Disconnected |
sessionState == SessionState.NoSyncConnected =>
logger.warning(s"Intermediate Failure session state: $sessionState. " +
s"Session: $sessionIdAsHex. Data is now unavailable.")
u() = Activity.Failed(new Exception("" + sessionState))
// Do NOT keep retrying, wait to be reconnected automatically by the underlying session
case WatchState.SessionState(sessionState) =>
logger.error(s"Unexpected session state $sessionState. Session: $sessionIdAsHex")
u() = Activity.Failed(new Exception("" + sessionState))
// We don't know what happened. Retry.
retryWithDelay { loop() }
}
}
Future.Done
}
loop()
Closable.make { deadline =>
closed = true
Future.Done
}
})
private val existsWatchOp = Memoize { path: String =>
watchedOperation { zkr.existsWatch(path) }
}
private val getChildrenWatchOp = Memoize { path: String =>
watchedOperation { zkr.getChildrenWatch(path) }
}
/**
* A persistent version of exists: existsOf returns an Activity representing
* the current (best-effort) Stat for the given path.
*/
def existsOf(path: String): Activity[Option[Data.Stat]] =
existsWatchOp(path)
/**
* A persistent version of glob: globOf returns an Activity
* representing the current (best-effort) list of children for the
* given path, under the given prefix. Note that paths returned are
* absolute.
*/
def globOf(pattern: String): Activity[Set[String]] = {
val slash = pattern.lastIndexOf('/')
if (slash < 0)
return Activity.exception(new IllegalArgumentException("Invalid pattern"))
val (path, prefix) = ZooKeeperReader.patToPathAndPrefix(pattern)
existsOf(path) flatMap {
case None => Activity.value(Set.empty)
case Some(_) =>
getChildrenWatchOp(path) transform {
case Activity.Pending => Activity.pending
case Activity.Ok(Node.Children(children, _)) =>
noteGoodChildWatch(path)
Activity.value(children.filter(_.startsWith(prefix)).toSet)
case Activity.Failed(KeeperException.NoNode(_)) =>
noteGoodChildWatch(path)
Activity.value(Set.empty)
case Activity.Failed(exc) =>
logger.error(s"GetChildrenWatch to ($path, $prefix) failed with exception $exc")
Activity.exception(exc)
}
}
}
/**
* A persistent version of getData: immutableDataOf returns a Future
* representing the current (best-effort) contents of the given
* path. Note: this only works on immutable nodes. I.e. it does not
* leave a watch on the node to look for changes.
*/
def immutableDataOf(path: String): Future[Option[Buf]] =
safeRetry(zkr.getData(path)).transform {
case Return(Node.Data(Some(data), _)) =>
logger.debug(s"Zk.GetData($path) retrieved ${data.length} bytes")
Future.value(Some(data))
case Return(_) => Future.value(None)
case Throw(ex:KeeperException.NoNode) => Future.value(None)
case Throw(exc) =>
statsReceiver.counter("read_fail").incr()
unexpectedExceptions.record(statsReceiver, exc)
logger.warning(s"Unexpected failure for session $sessionIdAsHex. retrieving node $path. ($exc)")
Future.exception(exc)
}
/**
* Collect immutable data from a number of paths together.
*/
def collectImmutableDataOf(paths: Seq[String]): Future[Seq[(String, Option[Buf])]] = {
def pathDataOf(path: String): Future[(String, Option[Buf])] =
immutableDataOf(path).map(path -> _)
Future.collect(paths map pathDataOf)
}
def addAuthInfo(scheme: String, auth: Buf): Future[Unit] = zkr.addAuthInfo(scheme, auth)
def existsWatch(path: String): Future[Watched[Option[Data.Stat]]] = zkr.existsWatch(path)
def getChildrenWatch(path: String): Future[Watched[Node.Children]] = zkr.getChildrenWatch(path)
def getData(path: String): Future[Node.Data] = zkr.getData(path)
def sessionId: Long = zkr.sessionId
def sessionIdAsHex = zkr.sessionId.toHexString
def sessionPasswd: Buf = zkr.sessionPasswd
def sessionTimeout: Duration = zkr.sessionTimeout
def close() = zkr.close()
}
private[serverset2] object ZkSession {
/** A noop ZkSession. */
val nil: ZkSession = {
implicit val timer = Timer.Nil
new ZkSession(RetryStream(), Watched(NullZooKeeperReader, Var(WatchState.Pending)), NullStatsReceiver)
}
val DefaultSessionTimeout = 10.seconds
private val authUser = Identities.get().headOption getOrElse(("/null"))
private val authInfo: String = "%s:%s".format(authUser, authUser)
private val logger = Logger("ZkSession")
/**
* Produce a new `ZkSession`.
*
* @param hosts A comma-separated "host:port" string for a ZooKeeper server.
* @param sessionTimeout The ZooKeeper session timeout to use.
*/
private[serverset2] def apply(
retryStream: RetryStream,
hosts: String,
sessionTimeout: Duration = DefaultSessionTimeout,
statsReceiver: StatsReceiver
)(implicit timer: Timer): ZkSession =
new ZkSession(retryStream,
ClientBuilder()
.hosts(hosts)
.sessionTimeout(sessionTimeout)
.statsReceiver(DefaultStatsReceiver.scope("zkclient").scope(Zk2Resolver.statsOf(hosts)))
.readOnlyOK()
.reader(),
statsReceiver.scope(Zk2Resolver.statsOf(hosts)))
/**
* Produce a `Var[ZkSession]` representing a ZooKeeper session that automatically
* reconnects upon session expiry. Reconnect attempts cease when any
* observation of the returned `Var[ZkSession]` is closed.
*/
def retrying(
backoff: RetryStream,
newZkSession: () => ZkSession
)(implicit timer: Timer): Var[ZkSession] = {
val v = Var(ZkSession.nil)
@volatile var closing = false
@volatile var zkSession: ZkSession = ZkSession.nil
def reconnect() {
if (closing) return
logger.info(s"Closing zk session ${zkSession.sessionIdAsHex}")
zkSession.close()
zkSession = newZkSession()
logger.info(s"Starting new zk session ${zkSession.sessionId}")
// Upon initial connection, send auth info, then update `u`.
zkSession.state.changes.filter {
_ == WatchState.SessionState(SessionState.SyncConnected)
}.toFuture.unit before zkSession.addAuthInfo("digest", Buf.Utf8(authInfo)) onSuccess { _ =>
logger.info(s"New ZKSession is connected. Session ID: ${zkSession.sessionIdAsHex}")
v() = zkSession
backoff.reset()
}
// Kick off a delayed reconnection on session expiration.
zkSession.state.changes.filter {
_ == WatchState.SessionState(SessionState.Expired)
}.toFuture().unit.before {
val jitter = backoff.next()
logger.error(s"Zookeeper session ${zkSession.sessionIdAsHex} has expired. Reconnecting in $jitter")
Future.sleep(jitter)
}.ensure { reconnect() }
}
reconnect()
Closable.make { deadline =>
closing = true
zkSession.close()
}
v
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy