All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.util.ReduceFn Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.dataflow.sdk.util;

import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
import com.google.cloud.dataflow.sdk.util.state.MergeableState;
import com.google.cloud.dataflow.sdk.util.state.State;
import com.google.cloud.dataflow.sdk.util.state.StateContents;
import com.google.cloud.dataflow.sdk.util.state.StateTag;

import org.joda.time.Instant;

import java.io.Serializable;
import java.util.Collection;
import java.util.Map;

/**
 * Specification for processing to happen after elements have been grouped by key.
 *
 * @param  The type of key being processed.
 * @param  The type of input values associated with the key.
 * @param  The output type that will be produced for each key.
 * @param  The type of windows this operates on.
 */
public abstract class ReduceFn
    implements Serializable {

  private static final long serialVersionUID = 0L;

  /** Interface for interacting with persistent state. */
  public interface StateContext {
    /** Access the storage for the given {@code address} in the current window. */
     StateT access(StateTag address);

    /**
     * Access the storage for the given {@code address} in all of the windows that were
     * merged into the current window including the current window.
     *
     * 

If no windows were merged, this reads from just the current window. */ > StateT accessAcrossMergedWindows( StateTag address); } /** Interface for interacting with persistent state within {@link #onMerge}. */ public interface MergingStateContext extends StateContext { /** * Access a merged view of the storage for the given {@code address} * in all of the windows being merged. */ public abstract > StateT accessAcrossMergingWindows( StateTag address); /** Access a map from windows being merged to the associated {@code StateT}. */ public abstract Map accessInEachMergingWindow( StateTag address); } /** * Interface for interacting with time. */ public interface Timers { /** * Sets a timer to fire when the watermark or processing time is beyond the given timestamp. * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards. * *

As with {@link StateContext}, timers are implicitly scoped to the current window. All * timer firings for a window will be received, but the implementation should choose to ignore * those that are not applicable. * * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should * execute * @param timeDomain the domain that the {@code timestamp} applies to */ public abstract void setTimer(Instant timestamp, TimeDomain timeDomain); /** * Removes the timer set in this trigger context for the given {@code window}, {@code timestmap} * and {@code timeDomain}. */ public abstract void deleteTimer(Instant timestamp, TimeDomain timeDomain); /** Returns the current processing time. */ public abstract Instant currentProcessingTime(); } /** Information accessible to all the processing methods in this {@code ReduceFn}. */ public abstract class Context { /** Return the key that is being processed. */ public abstract K key(); /** The window that is being processed. */ public abstract W window(); /** Access the current {@link WindowingStrategy}. */ public abstract WindowingStrategy windowingStrategy(); /** Return the interface for accessing state. */ public abstract StateContext state(); /** Return the interface for accessing timers. */ public abstract Timers timers(); } /** Information accessible within {@link #processValue}. */ public abstract class ProcessValueContext extends Context { /** Return the actual value being processed. */ public abstract InputT value(); /** Return the timestamp associated with the value. */ public abstract Instant timestamp(); } /** Information accessible within {@link #onMerge}. */ public abstract class OnMergeContext extends Context { /** * Return the collection of windows that were merged. * *

Note that this may include the result window. */ public abstract Collection mergingWindows(); /** Return the interface for accessing state. */ @Override public abstract MergingStateContext state(); } /** Information accessible within {@link #onTrigger}. */ public abstract class OnTriggerContext extends Context { /** Returns the {@link PaneInfo} for the trigger firing being processed. */ public abstract PaneInfo paneInfo(); /** Output the given value in the current window. */ public abstract void output(OutputT value); } ////////////////////////////////////////////////////////////////////////////////////////////////// /** * Called for each value of type {@code InputT} associated with the current key. */ public abstract void processValue(ProcessValueContext c) throws Exception; /** * Called when windows are merged. * *

There are generally two strategies for implementing this and handling merging of state: *

    *
  • Lazily merge the state when outputting. This is especially easy if all the state is stored * in {@link MergeableState}, since an automatically merged view can be retrieved. *
  • Eagerly merge the state inside the {@link #onMerge} implementation. Load all the state * from the merging windows and write it back to the result window. In this case the state in the * result window should be cleared into between the read and write in case it was in the source * windows. *
*/ public abstract void onMerge(OnMergeContext c) throws Exception; /** * Called when triggers fire. * *

Implementations of {@link ReduceFn} should call {@link OnTriggerContext#output} to emit * any results that should be included in the pane produced by this trigger firing. */ public abstract void onTrigger(OnTriggerContext c) throws Exception; /** * Called before {@link onTrigger} is invoked to provide an opportunity to prefetch any needed * state. * * @param c Context to use prefetch from. */ public void prefetchOnTrigger(StateContext c) { } /** * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be * called when the windowing is closing and will receive no future interactions. */ public abstract void clearState(Context c) throws Exception; /** * Returns true if the there is no buffered state. */ public abstract StateContents isEmpty(StateContext c); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy