com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms.windowing;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.cloud.dataflow.sdk.annotations.Experimental;
import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
import com.google.cloud.dataflow.sdk.util.TimeDomain;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.joda.time.Instant;
import java.util.List;
import java.util.Objects;
/**
* {@code AfterWatermark} triggers fire based on progress of the system watermark. This time is a
* lower-bound, sometimes heuristically established, on event times that have been fully processed
* by the pipeline.
*
*
For sources that provide non-heuristic watermarks (e.g.
* {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using arrival times as event times), the
* watermark is a strict guarantee that no data with an event time earlier than
* that watermark will ever be observed in the pipeline. In this case, it's safe to assume that any
* pane triggered by an {@code AfterWatermark} trigger with a reference point at or beyond the end
* of the window will be the last pane ever for that window.
*
*
For sources that provide heuristic watermarks (e.g.
* {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using user-supplied event times), the
* watermark itself becomes an estimate that no data with an event time earlier than that
* watermark (i.e. "late data") will ever be observed in the pipeline. These heuristics can
* often be quite accurate, but the chance of seeing late data for any given window is non-zero.
* Thus, if absolute correctness over time is important to your use case, you may want to consider
* using a trigger that accounts for late data. The default trigger,
* {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}, which fires
* once when the watermark passes the end of the window and then immediately therafter when any
* late data arrives, is one such example.
*
*
The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
*
* Additionaly firings before or after the watermark can be requested by calling
* {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)} or
* {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)}.
*
* @param {@link BoundedWindow} subclass used to represent the windows used.
*/
@Experimental(Experimental.Kind.TRIGGER)
public class AfterWatermark {
// Static factory class.
private AfterWatermark() {}
/**
* Creates a trigger that fires when the watermark passes the end of the window.
*/
public static FromEndOfWindow pastEndOfWindow() {
return new FromEndOfWindow();
}
/**
* Interface for building an AfterWatermarkTrigger with early firings already filled in.
*/
public interface AfterWatermarkEarly extends TriggerBuilder {
/**
* Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
* the given {@code Trigger} fires before the watermark has passed the end of the window.
*/
TriggerBuilder withLateFirings(OnceTrigger lateTrigger);
}
/**
* Interface for building an AfterWatermarkTrigger with late firings already filled in.
*/
public interface AfterWatermarkLate extends TriggerBuilder {
/**
* Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
* the given {@code Trigger} fires after the watermark has passed the end of the window.
*/
TriggerBuilder withEarlyFirings(OnceTrigger earlyTrigger);
}
/**
* A trigger which never fires. Used for the "early" trigger when only a late trigger was
* specified.
*/
private static class NeverTrigger extends OnceTrigger {
protected NeverTrigger() {
super(null);
}
@Override
public void onElement(OnElementContext c) throws Exception { }
@Override
public void onMerge(OnMergeContext c) throws Exception { }
@Override
protected Trigger getContinuationTrigger(List> continuationTriggers) {
return this;
}
@Override
public Instant getWatermarkThatGuaranteesFiring(W window) {
return BoundedWindow.TIMESTAMP_MAX_VALUE;
}
@Override
public boolean shouldFire(Trigger.TriggerContext context) throws Exception {
return false;
}
@Override
protected void onOnlyFiring(Trigger.TriggerContext context) throws Exception {
throw new UnsupportedOperationException(
String.format("%s should never fire", getClass().getSimpleName()));
}
}
private static class AfterWatermarkEarlyAndLate
extends Trigger
implements TriggerBuilder, AfterWatermarkEarly, AfterWatermarkLate {
private static final int EARLY_INDEX = 0;
private static final int LATE_INDEX = 1;
private final OnceTrigger earlyTrigger;
private final OnceTrigger lateTrigger;
@SuppressWarnings("unchecked")
private AfterWatermarkEarlyAndLate(OnceTrigger earlyTrigger, OnceTrigger lateTrigger) {
super(lateTrigger == null
? ImmutableList.>of(earlyTrigger)
: ImmutableList.>of(earlyTrigger, lateTrigger));
this.earlyTrigger = checkNotNull(earlyTrigger, "earlyTrigger should not be null");
this.lateTrigger = lateTrigger;
}
@Override
public TriggerBuilder withEarlyFirings(OnceTrigger earlyTrigger) {
return new AfterWatermarkEarlyAndLate(earlyTrigger, lateTrigger);
}
@Override
public TriggerBuilder withLateFirings(OnceTrigger lateTrigger) {
return new AfterWatermarkEarlyAndLate(earlyTrigger, lateTrigger);
}
@Override
public void onElement(OnElementContext c) throws Exception {
if (!c.trigger().isMerging()) {
// If merges can never happen, we just run the unfinished subtrigger
c.trigger().firstUnfinishedSubTrigger().invokeOnElement(c);
} else {
// If merges can happen, we run for all subtriggers because they might be
// de-activated or re-activated
for (ExecutableTrigger subTrigger : c.trigger().subTriggers()) {
subTrigger.invokeOnElement(c);
}
}
}
@Override
public void onMerge(OnMergeContext c) throws Exception {
// NOTE that the ReduceFnRunner will delete all end-of-window timers for the
// merged-away windows.
ExecutableTrigger earlySubtrigger = c.trigger().subTrigger(EARLY_INDEX);
// We check the early trigger to determine if we are still processing it or
// if the end of window has transitioned us to the late trigger
OnMergeContext earlyContext = c.forTrigger(earlySubtrigger);
// If the early trigger is still active in any merging window then it is still active in
// the new merged window, because even if the merged window is "done" some pending elements
// haven't had a chance to fire.
if (!earlyContext.trigger().finishedInAllMergingWindows() || !endOfWindowReached(c)) {
earlyContext.trigger().setFinished(false);
if (lateTrigger != null) {
ExecutableTrigger lateSubtrigger = c.trigger().subTrigger(LATE_INDEX);
OnMergeContext lateContext = c.forTrigger(lateSubtrigger);
lateContext.trigger().setFinished(false);
lateSubtrigger.invokeClear(lateContext);
}
} else {
// Otherwise the early trigger and end-of-window bit is done for good.
earlyContext.trigger().setFinished(true);
if (lateTrigger != null) {
c.trigger().subTrigger(LATE_INDEX).invokeOnMerge(c);
}
}
}
@Override
public Trigger getContinuationTrigger() {
return new AfterWatermarkEarlyAndLate(
earlyTrigger.getContinuationTrigger(),
lateTrigger == null ? null : lateTrigger.getContinuationTrigger());
}
@Override
protected Trigger getContinuationTrigger(List> continuationTriggers) {
throw new UnsupportedOperationException(
"Should not call getContinuationTrigger(List>)");
}
@Override
public Instant getWatermarkThatGuaranteesFiring(W window) {
// Even without an early or late trigger, we'll still produce a firing at the watermark.
return window.maxTimestamp();
}
private boolean endOfWindowReached(Trigger.TriggerContext context) {
return context.currentEventTime() != null
&& context.currentEventTime().isAfter(context.window().maxTimestamp());
}
@Override
public boolean shouldFire(Trigger.TriggerContext context) throws Exception {
if (!context.trigger().isFinished(EARLY_INDEX)) {
// We have not yet transitioned to late firings.
// We should fire if either the trigger is ready or we reach the end of the window.
return context.trigger().subTrigger(EARLY_INDEX).invokeShouldFire(context)
|| endOfWindowReached(context);
} else if (lateTrigger == null) {
return false;
} else {
// We are running the late trigger
return context.trigger().subTrigger(LATE_INDEX).invokeShouldFire(context);
}
}
@Override
public void onFire(Trigger.TriggerContext context) throws Exception {
if (!context.forTrigger(context.trigger().subTrigger(EARLY_INDEX)).trigger().isFinished()) {
onNonLateFiring(context);
} else if (lateTrigger != null) {
onLateFiring(context);
} else {
// all done
context.trigger().setFinished(true);
}
}
private void onNonLateFiring(Trigger.TriggerContext context) throws Exception {
// We have not yet transitioned to late firings.
ExecutableTrigger earlySubtrigger = context.trigger().subTrigger(EARLY_INDEX);
Trigger.TriggerContext earlyContext = context.forTrigger(earlySubtrigger);
if (!endOfWindowReached(context)) {
// This is an early firing, since we have not arrived at the end of the window
// Implicitly repeats
earlySubtrigger.invokeOnFire(context);
earlySubtrigger.invokeClear(context);
earlyContext.trigger().setFinished(false);
} else {
// We have arrived at the end of the window; terminate the early trigger
// and clear out the late trigger's state
if (earlySubtrigger.invokeShouldFire(context)) {
earlySubtrigger.invokeOnFire(context);
}
earlyContext.trigger().setFinished(true);
earlySubtrigger.invokeClear(context);
if (lateTrigger == null) {
// Done if there is no late trigger.
context.trigger().setFinished(true);
} else {
// If there is a late trigger, we transition to it, and need to clear its state
// because it was run in parallel.
context.trigger().subTrigger(LATE_INDEX).invokeClear(context);
}
}
}
private void onLateFiring(Trigger.TriggerContext context) throws Exception {
// We are firing the late trigger, with implicit repeat
ExecutableTrigger lateSubtrigger = context.trigger().subTrigger(LATE_INDEX);
lateSubtrigger.invokeOnFire(context);
// It is a OnceTrigger, so it must have finished; unfinished it and clear it
lateSubtrigger.invokeClear(context);
context.forTrigger(lateSubtrigger).trigger().setFinished(false);
}
}
/**
* A watermark trigger targeted relative to the end of the window.
*/
public static class FromEndOfWindow extends OnceTrigger {
private FromEndOfWindow() {
super(null);
}
/**
* Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
* the given {@code Trigger} fires before the watermark has passed the end of the window.
*/
public AfterWatermarkEarly withEarlyFirings(OnceTrigger earlyFirings) {
Preconditions.checkNotNull(earlyFirings,
"Must specify the trigger to use for early firings");
return new AfterWatermarkEarlyAndLate(earlyFirings, null);
}
/**
* Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
* the given {@code Trigger} fires after the watermark has passed the end of the window.
*/
public AfterWatermarkLate withLateFirings(OnceTrigger lateFirings) {
Preconditions.checkNotNull(lateFirings,
"Must specify the trigger to use for late firings");
return new AfterWatermarkEarlyAndLate(new NeverTrigger(), lateFirings);
}
@Override
public void onElement(OnElementContext c) throws Exception {
// We're interested in knowing when the input watermark passes the end of the window.
// (It is possible this has already happened, in which case the timer will be fired
// almost immediately).
c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
}
@Override
public void onMerge(OnMergeContext c) throws Exception {
// NOTE that the ReduceFnRunner will delete all end-of-window timers for the
// merged-away windows.
if (!c.trigger().finishedInAllMergingWindows()) {
// If the trigger is still active in any merging window then it is still active in the new
// merged window, because even if the merged window is "done" some pending elements haven't
// had a chance to fire
c.trigger().setFinished(false);
} else if (!endOfWindowReached(c)) {
// If the end of the new window has not been reached, then the trigger is active again.
c.trigger().setFinished(false);
} else {
// Otherwise it is done for good
c.trigger().setFinished(true);
}
}
@Override
public Instant getWatermarkThatGuaranteesFiring(W window) {
return window.maxTimestamp();
}
@Override
public FromEndOfWindow getContinuationTrigger(List> continuationTriggers) {
return this;
}
@Override
public String toString() {
return "AfterWatermark.pastEndOfWindow()";
}
@Override
public boolean equals(Object obj) {
return obj instanceof FromEndOfWindow;
}
@Override
public int hashCode() {
return Objects.hash(getClass());
}
@Override
public boolean shouldFire(Trigger.TriggerContext context) throws Exception {
return endOfWindowReached(context);
}
private boolean endOfWindowReached(Trigger.TriggerContext context) {
return context.currentEventTime() != null
&& context.currentEventTime().isAfter(context.window().maxTimestamp());
}
@Override
protected void onOnlyFiring(Trigger.TriggerContext context) throws Exception { }
}
}