com.google.cloud.dataflow.sdk.util.state.StateMerging Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util.state;
import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.common.base.Preconditions;
import org.joda.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
* Helpers for merging state.
*/
public class StateMerging {
/**
* Clear all state in {@code address} in all windows under merge (even result windows)
* in {@code context}.
*/
public static void clear(
MergingStateAccessor context, StateTag super K, StateT> address) {
for (StateT state : context.accessInEachMergingWindow(address).values()) {
state.clear();
}
}
/**
* Prefetch all bag state in {@code address} across all windows under merge in
* {@code context}, except for the bag state in the final state address window which we can
* blindly append to.
*/
public static void prefetchBags(
MergingStateAccessor context, StateTag super K, BagState> address) {
Map> map = context.accessInEachMergingWindow(address);
if (map.isEmpty()) {
// Nothing to prefetch.
return;
}
BagState result = context.access(address);
// Prefetch everything except what's already in result.
for (BagState source : map.values()) {
if (!source.equals(result)) {
source.readLater();
}
}
}
/**
* Merge all bag state in {@code address} across all windows under merge.
*/
public static void mergeBags(
MergingStateAccessor context, StateTag super K, BagState> address) {
mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
}
/**
* Merge all bag state in {@code sources} (which may include {@code result}) into {@code result}.
*/
public static void mergeBags(
Collection> sources, BagState result) {
if (sources.isEmpty()) {
// Nothing to merge.
return;
}
// Prefetch everything except what's already in result.
List>> futures = new ArrayList<>(sources.size());
for (BagState source : sources) {
if (!source.equals(result)) {
source.readLater();
futures.add(source);
}
}
if (futures.isEmpty()) {
// Result already holds all the values.
return;
}
// Transfer from sources to result.
for (ReadableState> future : futures) {
for (T element : future.read()) {
result.add(element);
}
}
// Clear sources except for result.
for (BagState source : sources) {
if (!source.equals(result)) {
source.clear();
}
}
}
/**
* Prefetch all combining value state for {@code address} across all merging windows in {@code
* context}.
*/
public static , W extends BoundedWindow> void
prefetchCombiningValues(MergingStateAccessor context,
StateTag super K, StateT> address) {
for (StateT state : context.accessInEachMergingWindow(address).values()) {
state.readLater();
}
}
/**
* Merge all value state in {@code address} across all merging windows in {@code context}.
*/
public static void mergeCombiningValues(
MergingStateAccessor context,
StateTag super K, AccumulatorCombiningState> address) {
mergeCombiningValues(
context.accessInEachMergingWindow(address).values(), context.access(address));
}
/**
* Merge all value state from {@code sources} (which may include {@code result}) into
* {@code result}.
*/
public static void mergeCombiningValues(
Collection> sources,
AccumulatorCombiningState result) {
if (sources.isEmpty()) {
// Nothing to merge.
return;
}
if (sources.size() == 1 && sources.contains(result)) {
// Result already holds combined value.
return;
}
// Prefetch.
List> futures = new ArrayList<>(sources.size());
for (AccumulatorCombiningState source : sources) {
source.readLater();
}
// Read.
List accumulators = new ArrayList<>(futures.size());
for (AccumulatorCombiningState source : sources) {
accumulators.add(source.getAccum());
}
// Merge (possibly update and return one of the existing accumulators).
AccumT merged = result.mergeAccumulators(accumulators);
// Clear sources.
for (AccumulatorCombiningState source : sources) {
source.clear();
}
// Update result.
result.addAccum(merged);
}
/**
* Prefetch all watermark state for {@code address} across all merging windows in
* {@code context}.
*/
public static void prefetchWatermarks(
MergingStateAccessor context,
StateTag super K, WatermarkHoldState> address) {
Map> map = context.accessInEachMergingWindow(address);
WatermarkHoldState result = context.access(address);
if (map.isEmpty()) {
// Nothing to prefetch.
return;
}
if (map.size() == 1 && map.values().contains(result)
&& result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
// Nothing to change.
return;
}
if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
// No need to read existing holds.
return;
}
// Prefetch.
for (WatermarkHoldState source : map.values()) {
source.readLater();
}
}
/**
* Merge all watermark state in {@code address} across all merging windows in {@code context},
* where the final merge result window is {@code mergeResult}.
*/
public static void mergeWatermarks(
MergingStateAccessor context,
StateTag super K, WatermarkHoldState> address,
W mergeResult) {
mergeWatermarks(
context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
}
/**
* Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
* into {@code result}, where the final merge result window is {@code mergeResult}.
*/
public static void mergeWatermarks(
Collection> sources, WatermarkHoldState result,
W resultWindow) {
if (sources.isEmpty()) {
// Nothing to merge.
return;
}
if (sources.size() == 1 && sources.contains(result)
&& result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
// Nothing to merge.
return;
}
if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
// Clear sources.
for (WatermarkHoldState source : sources) {
source.clear();
}
// Update directly from window-derived hold.
Instant hold = result.getOutputTimeFn().assignOutputTime(
BoundedWindow.TIMESTAMP_MIN_VALUE, resultWindow);
Preconditions.checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
result.add(hold);
} else {
// Prefetch.
List> futures = new ArrayList<>(sources.size());
for (WatermarkHoldState source : sources) {
futures.add(source);
}
// Read.
List outputTimesToMerge = new ArrayList<>(sources.size());
for (ReadableState future : futures) {
Instant sourceOutputTime = future.read();
if (sourceOutputTime != null) {
outputTimesToMerge.add(sourceOutputTime);
}
}
// Clear sources.
for (WatermarkHoldState source : sources) {
source.clear();
}
if (!outputTimesToMerge.isEmpty()) {
// Merge and update.
result.add(result.getOutputTimeFn().merge(resultWindow, outputTimesToMerge));
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy