All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.streaming.State.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.streaming

import scala.language.implicitConversions

import org.apache.spark.annotation.Experimental

/**
 * :: Experimental ::
 * Abstract class for getting and updating the state in mapping function used in the `mapWithState`
 * operation of a [[org.apache.spark.streaming.dstream.PairDStreamFunctions pair DStream]] (Scala)
 * or a [[org.apache.spark.streaming.api.java.JavaPairDStream JavaPairDStream]] (Java).
 *
 * Scala example of using `State`:
 * {{{
 *    // A mapping function that maintains an integer state and returns a String
 *    def mappingFunction(key: String, value: Option[Int], state: State[Int]): Option[String] = {
 *      // Check if state exists
 *      if (state.exists) {
 *        val existingState = state.get  // Get the existing state
 *        val shouldRemove = ...         // Decide whether to remove the state
 *        if (shouldRemove) {
 *          state.remove()     // Remove the state
 *        } else {
 *          val newState = ...
 *          state.update(newState)    // Set the new state
 *        }
 *      } else {
 *        val initialState = ...
 *        state.update(initialState)  // Set the initial state
 *      }
 *      ... // return something
 *    }
 *
 * }}}
 *
 * Java example of using `State`:
 * {{{
 *    // A mapping function that maintains an integer state and returns a String
 *    Function3, State, String> mappingFunction =
 *       new Function3, State, String>() {
 *
 *         @Override
 *         public String call(String key, Optional value, State state) {
 *           if (state.exists()) {
 *             int existingState = state.get(); // Get the existing state
 *             boolean shouldRemove = ...; // Decide whether to remove the state
 *             if (shouldRemove) {
 *               state.remove(); // Remove the state
 *             } else {
 *               int newState = ...;
 *               state.update(newState); // Set the new state
 *             }
 *           } else {
 *             int initialState = ...; // Set the initial state
 *             state.update(initialState);
 *           }
 *           // return something
 *         }
 *       };
 * }}}
 *
 * @tparam S Class of the state
 */
@Experimental
sealed abstract class State[S] {

  /** Whether the state already exists */
  def exists(): Boolean

  /**
   * Get the state if it exists, otherwise it will throw `java.util.NoSuchElementException`.
   * Check with `exists()` whether the state exists or not before calling `get()`.
   *
   * @throws java.util.NoSuchElementException If the state does not exist.
   */
  def get(): S

  /**
   * Update the state with a new value.
   *
   * State cannot be updated if it has been already removed (that is, `remove()` has already been
   * called) or it is going to be removed due to timeout (that is, `isTimingOut()` is `true`).
   *
   * @throws java.lang.IllegalArgumentException If the state has already been removed, or is
   *                                            going to be removed
   */
  def update(newState: S): Unit

  /**
   * Remove the state if it exists.
   *
   * State cannot be updated if it has been already removed (that is, `remove()` has already been
   * called) or it is going to be removed due to timeout (that is, `isTimingOut()` is `true`).
   */
  def remove(): Unit

  /**
   * Whether the state is timing out and going to be removed by the system after the current batch.
   * This timeout can occur if timeout duration has been specified in the
   * [[org.apache.spark.streaming.StateSpec StatSpec]] and the key has not received any new data
   * for that timeout duration.
   */
  def isTimingOut(): Boolean

  /**
   * Get the state as a [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`.
   */
  @inline final def getOption(): Option[S] = if (exists) Some(get()) else None

  @inline final override def toString(): String = {
    getOption.map { _.toString }.getOrElse("")
  }
}

/** Internal implementation of the [[State]] interface */
private[streaming] class StateImpl[S] extends State[S] {

  private var state: S = null.asInstanceOf[S]
  private var defined: Boolean = false
  private var timingOut: Boolean = false
  private var updated: Boolean = false
  private var removed: Boolean = false

  // ========= Public API =========
  override def exists(): Boolean = {
    defined
  }

  override def get(): S = {
    if (defined) {
      state
    } else {
      throw new NoSuchElementException("State is not set")
    }
  }

  override def update(newState: S): Unit = {
    require(!removed, "Cannot update the state after it has been removed")
    require(!timingOut, "Cannot update the state that is timing out")
    state = newState
    defined = true
    updated = true
  }

  override def isTimingOut(): Boolean = {
    timingOut
  }

  override def remove(): Unit = {
    require(!timingOut, "Cannot remove the state that is timing out")
    require(!removed, "Cannot remove the state that has already been removed")
    defined = false
    updated = false
    removed = true
  }

  // ========= Internal API =========

  /** Whether the state has been marked for removing */
  def isRemoved(): Boolean = {
    removed
  }

  /** Whether the state has been been updated */
  def isUpdated(): Boolean = {
    updated
  }

  /**
   * Update the internal data and flags in `this` to the given state option.
   * This method allows `this` object to be reused across many state records.
   */
  def wrap(optionalState: Option[S]): Unit = {
    optionalState match {
      case Some(newState) =>
        this.state = newState
        defined = true

      case None =>
        this.state = null.asInstanceOf[S]
        defined = false
    }
    timingOut = false
    removed = false
    updated = false
  }

  /**
   * Update the internal data and flags in `this` to the given state that is going to be timed out.
   * This method allows `this` object to be reused across many state records.
   */
  def wrapTimingOutState(newState: S): Unit = {
    this.state = newState
    defined = true
    timingOut = true
    removed = false
    updated = false
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy