All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.util.state.StateMerging Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.dataflow.sdk.util.state;

import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.common.base.Preconditions;

import org.joda.time.Instant;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;

/**
 * Helpers for merging state.
 */
public class StateMerging {
  /**
   * Clear all state in {@code address} in all windows under merge (even result windows)
   * in {@code context}.
   */
  public static  void clear(
      MergingStateAccessor context, StateTag address) {
    for (StateT state : context.accessInEachMergingWindow(address).values()) {
      state.clear();
    }
  }

  /**
   * Prefetch all bag state in {@code address} across all windows under merge in
   * {@code context}, except for the bag state in the final state address window which we can
   * blindly append to.
   */
  public static  void prefetchBags(
      MergingStateAccessor context, StateTag> address) {
    Map> map = context.accessInEachMergingWindow(address);
    if (map.isEmpty()) {
      // Nothing to prefetch.
      return;
    }
    BagState result = context.access(address);
    // Prefetch everything except what's already in result.
    for (BagState source : map.values()) {
      if (!source.equals(result)) {
        source.readLater();
      }
    }
  }

  /**
   * Merge all bag state in {@code address} across all windows under merge.
   */
  public static  void mergeBags(
      MergingStateAccessor context, StateTag> address) {
    mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
  }

  /**
   * Merge all bag state in {@code sources} (which may include {@code result}) into {@code result}.
   */
  public static  void mergeBags(
      Collection> sources, BagState result) {
    if (sources.isEmpty()) {
      // Nothing to merge.
      return;
    }
    // Prefetch everything except what's already in result.
    List>> futures = new ArrayList<>(sources.size());
    for (BagState source : sources) {
      if (!source.equals(result)) {
        source.readLater();
        futures.add(source);
      }
    }
    if (futures.isEmpty()) {
      // Result already holds all the values.
      return;
    }
    // Transfer from sources to result.
    for (ReadableState> future : futures) {
      for (T element : future.read()) {
        result.add(element);
      }
    }
    // Clear sources except for result.
    for (BagState source : sources) {
      if (!source.equals(result)) {
        source.clear();
      }
    }
  }

  /**
   * Prefetch all combining value state for {@code address} across all merging windows in {@code
   * context}.
   */
  public static , W extends BoundedWindow> void
      prefetchCombiningValues(MergingStateAccessor context,
          StateTag address) {
    for (StateT state : context.accessInEachMergingWindow(address).values()) {
      state.readLater();
    }
  }

  /**
   * Merge all value state in {@code address} across all merging windows in {@code context}.
   */
  public static  void mergeCombiningValues(
      MergingStateAccessor context,
      StateTag> address) {
    mergeCombiningValues(
        context.accessInEachMergingWindow(address).values(), context.access(address));
  }

  /**
   * Merge all value state from {@code sources} (which may include {@code result}) into
   * {@code result}.
   */
  public static  void mergeCombiningValues(
      Collection> sources,
      AccumulatorCombiningState result) {
    if (sources.isEmpty()) {
      // Nothing to merge.
      return;
    }
    if (sources.size() == 1 && sources.contains(result)) {
      // Result already holds combined value.
      return;
    }
    // Prefetch.
    List> futures = new ArrayList<>(sources.size());
    for (AccumulatorCombiningState source : sources) {
      source.readLater();
    }
    // Read.
    List accumulators = new ArrayList<>(futures.size());
    for (AccumulatorCombiningState source : sources) {
      accumulators.add(source.getAccum());
    }
    // Merge (possibly update and return one of the existing accumulators).
    AccumT merged = result.mergeAccumulators(accumulators);
    // Clear sources.
    for (AccumulatorCombiningState source : sources) {
      source.clear();
    }
    // Update result.
    result.addAccum(merged);
  }

  /**
   * Prefetch all watermark state for {@code address} across all merging windows in
   * {@code context}.
   */
  public static  void prefetchWatermarks(
      MergingStateAccessor context,
      StateTag> address) {
    Map> map = context.accessInEachMergingWindow(address);
    WatermarkHoldState result = context.access(address);
    if (map.isEmpty()) {
      // Nothing to prefetch.
      return;
    }
    if (map.size() == 1 && map.values().contains(result)
        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
      // Nothing to change.
      return;
    }
    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
      // No need to read existing holds.
      return;
    }
    // Prefetch.
    for (WatermarkHoldState source : map.values()) {
      source.readLater();
    }
  }

  /**
   * Merge all watermark state in {@code address} across all merging windows in {@code context},
   * where the final merge result window is {@code mergeResult}.
   */
  public static  void mergeWatermarks(
      MergingStateAccessor context,
      StateTag> address,
      W mergeResult) {
    mergeWatermarks(
        context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
  }

  /**
   * Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
   * into {@code result}, where the final merge result window is {@code mergeResult}.
   */
  public static  void mergeWatermarks(
      Collection> sources, WatermarkHoldState result,
      W resultWindow) {
    if (sources.isEmpty()) {
      // Nothing to merge.
      return;
    }
    if (sources.size() == 1 && sources.contains(result)
        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
      // Nothing to merge.
      return;
    }
    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
      // Clear sources.
      for (WatermarkHoldState source : sources) {
        source.clear();
      }
      // Update directly from window-derived hold.
      Instant hold = result.getOutputTimeFn().assignOutputTime(
          BoundedWindow.TIMESTAMP_MIN_VALUE, resultWindow);
      Preconditions.checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
      result.add(hold);
    } else {
      // Prefetch.
      List> futures = new ArrayList<>(sources.size());
      for (WatermarkHoldState source : sources) {
        futures.add(source);
      }
      // Read.
      List outputTimesToMerge = new ArrayList<>(sources.size());
      for (ReadableState future : futures) {
        Instant sourceOutputTime = future.read();
        if (sourceOutputTime != null) {
          outputTimesToMerge.add(sourceOutputTime);
        }
      }
      // Clear sources.
      for (WatermarkHoldState source : sources) {
        source.clear();
      }
      if (!outputTimesToMerge.isEmpty()) {
        // Merge and update.
        result.add(result.getOutputTimeFn().merge(resultWindow, outputTimesToMerge));
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy