All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.util.ReduceFnRunner Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.dataflow.sdk.util;

import com.google.cloud.dataflow.sdk.transforms.Aggregator;
import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
import com.google.cloud.dataflow.sdk.util.TriggerRunner.Result;
import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
import com.google.cloud.dataflow.sdk.util.state.StateContents;
import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Throwables;

import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Collections;
import java.util.List;

import javax.annotation.Nullable;

/**
 * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
 * {@link PCollection} by key.
 *
 * 

The {@link #onTrigger} relies on a {@link TriggerRunner} to manage the execution of * the triggering logic. The {@code ReduceFnRunner}s responsibilities are: * *

    *
  • Tracking the windows that are active (have buffered data) as elements arrive and * triggers are fired. *
  • Holding the watermark based on the timestamps of elements in a pane and releasing it * when the trigger fires. *
  • Dropping data that exceeds the maximum allowed lateness. *
  • Calling the appropriate callbacks on {@link ReduceFn} based on trigger execution, timer * firings, etc. *
  • Scheduling garbage collection of state associated with a specific window, and making that * happen when the appropriate timer fires. *
* * @param The type of key being processed. * @param The type of values associated with the key. * @param The output type that will be produced for each key. * @param The type of windows this operates on. */ public class ReduceFnRunner implements MergeCallback { private static final Logger LOG = LoggerFactory.getLogger(ReduceFnRunner.class); public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow"; public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness"; private final WindowingStrategy windowingStrategy; private final TimerInternals timerInternals; private final WindowingInternals> windowingInternals; private final Aggregator droppedDueToClosedWindow; private final Aggregator droppedDueToLateness; private final TriggerRunner triggerRunner; private final K key; private final ActiveWindowSet activeWindows; private final WatermarkHold watermarkHold; private final ReduceFnContextFactory contextFactory; private final ReduceFn reduceFn; private final PaneInfoTracker paneInfo; private final NonEmptyPanes nonEmptyPanes; public ReduceFnRunner( K key, WindowingStrategy windowingStrategy, TimerInternals timerInternals, WindowingInternals> windowingInternals, Aggregator droppedDueToClosedWindow, Aggregator droppedDueToLateness, ReduceFn reduceFn) { this.key = key; this.timerInternals = timerInternals; this.paneInfo = new PaneInfoTracker(timerInternals); this.windowingInternals = windowingInternals; this.droppedDueToClosedWindow = droppedDueToClosedWindow; this.droppedDueToLateness = droppedDueToLateness; this.reduceFn = reduceFn; @SuppressWarnings("unchecked") WindowingStrategy objectWindowingStrategy = (WindowingStrategy) windowingStrategy; this.windowingStrategy = objectWindowingStrategy; this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn); this.activeWindows = createActiveWindowSet(); this.contextFactory = new ReduceFnContextFactory( key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(), this.activeWindows, timerInternals); this.watermarkHold = new WatermarkHold<>(windowingStrategy); this.triggerRunner = new TriggerRunner<>( windowingStrategy.getTrigger(), new TriggerContextFactory<>(windowingStrategy, this.windowingInternals.stateInternals(), activeWindows)); } private ActiveWindowSet createActiveWindowSet() { return windowingStrategy.getWindowFn().isNonMerging() ? new NonMergingActiveWindowSet() : new MergingActiveWindowSet( windowingStrategy.getWindowFn(), windowingInternals.stateInternals()); } @VisibleForTesting boolean isFinished(W window) { return triggerRunner.isClosed(contextFactory.base(window).state()); } public void processElement(WindowedValue value) { Lateness lateness = getLateness(value); if (lateness.isPastAllowedLateness) { // Drop the element in all assigned windows if it is past the allowed lateness limit. droppedDueToLateness.addValue((long) value.getWindows().size()); return; } @SuppressWarnings("unchecked") Collection windows = (Collection) value.getWindows(); // Prefetch in each of the windows for (W window : windows) { ReduceFn.ProcessValueContext context = contextFactory.forValue(window, value.getValue(), value.getTimestamp()); triggerRunner.prefetchForValue(context.state()); } // And process each of the windows for (W window : windows) { ReduceFn.ProcessValueContext context = contextFactory.forValue(window, value.getValue(), value.getTimestamp()); // Check to see if the triggerRunner thinks the window is closed. If so, drop that window. if (triggerRunner.isClosed(context.state())) { droppedDueToClosedWindow.addValue(1L); continue; } // If this is a new window if (activeWindows.add(window)) { // And schedule cleanup scheduleCleanup(context); } nonEmptyPanes.recordContent(context); // Update the watermark hold since the value will be part of the next pane. watermarkHold.addHold(context, lateness.isLate); // Execute the reduceFn, which will buffer the value as appropriate try { reduceFn.processValue(context); } catch (Exception e) { throw wrapMaybeUserException(e); } // Run the trigger and handle the result as appropriate try { handleTriggerResult(context, false, triggerRunner.processValue(context)); } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Failed to run trigger", e); } } } private void holdForEmptyPanes(ReduceFn.Context context) { if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) { watermarkHold.holdForFinal(context); } else { watermarkHold.holdForOnTime(context); } } /** * Attempt to merge all of the windows. */ @VisibleForTesting void merge() throws Exception { activeWindows.mergeIfAppropriate(null, this); } /** * Make sure that all the state built up in this runner has been persisted. */ public void persist() { activeWindows.persist(); } /** * Called when windows merge. */ @Override public void onMerge(Collection mergedWindows, W resultWindow, boolean isResultWindowNew) { ReduceFn.OnMergeContext resultContext = contextFactory.forMerge(mergedWindows, resultWindow); // Schedule state reads for trigger execution. triggerRunner.prefetchForMerge(resultContext.state()); // Run the reduceFn to perform any needed merging. try { reduceFn.onMerge(resultContext); } catch (Exception e) { throw wrapMaybeUserException(e); } // Have the trigger merge state as needed, and handle the result. Result triggerResult; try { triggerResult = triggerRunner.onMerge(resultContext); } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Failed to merge the triggers", e); } // Cleanup the trigger state in the old windows. for (W mergedWindow : mergedWindows) { if (!mergedWindow.equals(resultWindow)) { try { ReduceFn.Context mergedContext = contextFactory.base(mergedWindow); cancelCleanup(mergedContext); triggerRunner.clearEverything(mergedContext); paneInfo.clear(mergedContext.state()); } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Exception while clearing trigger state", e); } } } // Schedule cleanup if the window is new. Do this after cleaning up the old state in case one // of them had a timer at the same point. if (isResultWindowNew) { scheduleCleanup(resultContext); } // Handle the trigger result as appropriate. handleTriggerResult(resultContext, false, triggerResult); } /** * Called when a timer fires. */ public void onTimer(TimerData timer) { if (!(timer.getNamespace() instanceof WindowNamespace)) { throw new IllegalArgumentException( "Expected WindowNamespace, but was " + timer.getNamespace()); } @SuppressWarnings("unchecked") WindowNamespace windowNamespace = (WindowNamespace) timer.getNamespace(); W window = windowNamespace.getWindow(); ReduceFn.Context context = contextFactory.base(window); // If this timer firing is at the watermark, then it may cause a trigger firing of an // AfterWatermark trigger. boolean isAtWatermark = TimeDomain.EVENT_TIME == timer.getDomain() && !timer.getTimestamp().isBefore(window.maxTimestamp()); if (TimeDomain.EVENT_TIME == timer.getDomain() && (isCleanupTime(window, timer.getTimestamp()) || isCleanupTime(window, timerInternals.currentWatermarkTime()))) { // If it looks like this was a watermark firing, see if the trigger tree was waiting for it. // If it fires, then we know there was a pending AfterWatermark trigger. if (isAtWatermark) { TriggerRunner.Result timerResult = runTriggersForTimer(context, timer); isAtWatermark = (timerResult != null && timerResult.isFire()); } // Do the actual cleanup try { doCleanup(context, isAtWatermark); } catch (Exception e) { LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e); } } else { // Skip timers for expired windows. if (isCleanupTime(window, timerInternals.currentWatermarkTime())) { return; } TriggerRunner.Result timerResult = runTriggersForTimer(context, timer); if (timerResult != null) { handleTriggerResult(context, isAtWatermark, timerResult); } // Since we processed an on-time firing, we should schedule the GC timer. if (TimeDomain.EVENT_TIME == timer.getDomain() && timer.getTimestamp().isEqual(window.maxTimestamp())) { scheduleCleanup(context); } } } @Nullable private TriggerRunner.Result runTriggersForTimer( ReduceFn.Context context, TimerData timer) { triggerRunner.prefetchForTimer(context.state()); // Skip timers for windows that were closed by triggers, but haven't expired yet. if (triggerRunner.isClosed(context.state())) { return null; } try { return triggerRunner.onTimer(context, timer); } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Exception in onTimer for trigger", e); } } /** Called when the cleanup timer has fired for the given window. */ private void doCleanup( ReduceFn.Context context, boolean maybeAtWatermark) throws Exception { // If the window is active, fire a pane. if (!triggerRunner.isClosed(context.state())) { // Before we fire, make sure this window doesn't get merged away. If it does, the merging // should have cleaned up the window anyways. try { if (!activeWindows.mergeIfAppropriate(context.window(), this)) { // The window was merged away. Either the onMerge fired the resulting window or the // trigger wasn't ready to fire in the resulting window. Either way, we aren't ready to // fire. // Since we haven't committed the finished bits for this window, we won't skip it when // merging. Instead, we'll examine the triggers that are pending on all merge windows, // and select the right behavior. return; } } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Exception while merging windows", e); } // Run onTrigger to produce the actual final pane contents. onTrigger(context, maybeAtWatermark, true /* isFinal */); } // Cleanup the associated state. nonEmptyPanes.clearPane(context); reduceFn.clearState(context); triggerRunner.clearEverything(context); paneInfo.clear(context.state()); watermarkHold.releaseOnTime(context); } private void handleTriggerResult( ReduceFn.Context context, boolean maybeAtWatermark, TriggerRunner.Result result) { // Unless the trigger is firing, there is nothing to do besides persisting the results. if (!result.isFire()) { result.persistFinishedSet(context.state()); return; } // Before we fire, make sure this window doesn't get merged away. try { if (!activeWindows.mergeIfAppropriate(context.window(), this)) { // The window was merged away. Either the onMerge fired the resulting window or the trigger // wasn't ready to fire in the resulting window. Either way, we aren't ready to fire. // Since we haven't committed the finished bits for this window, we won't skip it when // merging. Instead, we'll examine the triggers that are pending on all merge windows, // and select the right behavior. return; } } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Exception while merging windows", e); } // Run onTrigger to produce the actual pane contents. onTrigger(context, maybeAtWatermark, result.isFinish()); // Now that we've triggered, the pane is empty. nonEmptyPanes.clearPane(context); // Cleanup buffered data if appropriate if (shouldDiscardAfterFiring(result)) { // Clear the reduceFn state try { reduceFn.clearState(context); } catch (Exception e) { throw wrapMaybeUserException(e); } // Remove the window from active set -- nothing is buffered. activeWindows.remove(context.window()); } if (result.isFinish()) { // If we're finishing, clear up the trigger tree as well. try { triggerRunner.clearState(context); paneInfo.clear(context.state()); watermarkHold.releaseFinal(context); } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException("Exception while clearing trigger state", e); } } // Make sure we've persisted the finished bits. result.persistFinishedSet(context.state()); } public static StateContents stateContentsOf(final T value) { return new StateContents() { @Override public T read() { return value; } }; } /** * Run the {@link ReduceFn#onTrigger} method and produce any necessary output. * * @param context the context for the pane to fire * @param isAtWatermark true if this triggering is for an AfterWatermark trigger * @param isFinal true if this will be the last triggering processed */ private void onTrigger(final ReduceFn.Context context, boolean isAtWatermark, boolean isFinal) { StateContents outputTimestampFuture = watermarkHold.extractAndRelease(context); StateContents paneFuture = paneInfo.getNextPaneInfo(context, isAtWatermark, isFinal); StateContents isEmptyFuture = nonEmptyPanes.isEmpty(context); reduceFn.prefetchOnTrigger(context.state()); final PaneInfo pane = paneFuture.read(); final Instant outputTimestamp = outputTimestampFuture.read(); boolean shouldOutput = // If the pane is not empty !isEmptyFuture.read() // or this is the final pane, and the user has asked for it even if its empty || (isFinal && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) // or this is the on-time firing, and the user explicitly requested it. || (isAtWatermark && pane.getTiming() == Timing.ON_TIME); // We've consumed the empty pane hold by reading it, so reinstate that, if necessary. holdForEmptyPanes(context); // If there is nothing to output, we're done. if (!shouldOutput) { return; } // Run reduceFn.onTrigger method. final List windows = Collections.singletonList(context.window()); ReduceFn.OnTriggerContext triggerContext = contextFactory.forTrigger( context.window(), paneFuture, new OnTriggerCallbacks() { @Override public void output(OutputT toOutput) { // We're going to output panes, so commit the (now used) PaneInfo. paneInfo.storeCurrentPaneInfo(context, pane); // Output the actual value. windowingInternals.outputWindowedValue( KV.of(key, toOutput), outputTimestamp, windows, pane); } }); try { reduceFn.onTrigger(triggerContext); } catch (Exception e) { throw wrapMaybeUserException(e); } } private Instant cleanupTime(W window) { return window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()); } /** Return true if {@code timestamp} is past the cleanup time for {@code window}. */ private boolean isCleanupTime(W window, Instant timestamp) { return !timestamp.isBefore(cleanupTime(window)); } private void scheduleCleanup(ReduceFn.Context context) { if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) { context.timers().setTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME); } else { context.timers().setTimer(context.window().maxTimestamp(), TimeDomain.EVENT_TIME); } } private void cancelCleanup(ReduceFn.Context context) { context.timers().deleteTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME); } private boolean shouldDiscardAfterFiring(TriggerRunner.Result result) { return result.isFinish() || (result.isFire() && AccumulationMode.DISCARDING_FIRED_PANES == windowingStrategy.getMode()); } ////////////////////////////////////////////////////////////////////////////////////////////////// private enum Lateness { NOT_LATE(false, false), LATE(true, false), PAST_ALLOWED_LATENESS(true, true); private final boolean isLate; private final boolean isPastAllowedLateness; private Lateness(boolean isLate, boolean isPastAllowedLateness) { this.isLate = isLate; this.isPastAllowedLateness = isPastAllowedLateness; } } private Lateness getLateness(WindowedValue value) { Instant latestAllowed = timerInternals.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness()); if (value.getTimestamp().isBefore(latestAllowed)) { return Lateness.PAST_ALLOWED_LATENESS; } else if (value.getTimestamp().isBefore(timerInternals.currentWatermarkTime())) { return Lateness.LATE; } else { return Lateness.NOT_LATE; } } private RuntimeException wrapMaybeUserException(Throwable t) { if (reduceFn instanceof SystemReduceFn) { throw Throwables.propagate(t); } else { // Any exceptions that happen inside a non-system ReduceFn are considered user code. throw new UserCodeException(t); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy