All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.util.PushbackSideInputDoFnRunner Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2016 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.dataflow.sdk.util;

import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.values.PCollectionView;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/**
 * A {@link DoFnRunner} that can refuse to process elements that are not ready, instead returning
 * them via the {@link #processElementInReadyWindows(WindowedValue)}.
 */
public class PushbackSideInputDoFnRunner implements DoFnRunner {
  private final DoFnRunner underlying;
  private final Collection> views;
  private final ReadyCheckingSideInputReader sideInputReader;

  private Set notReadyWindows;

  public static  PushbackSideInputDoFnRunner create(
      DoFnRunner underlying,
      Collection> views,
      ReadyCheckingSideInputReader sideInputReader) {
    return new PushbackSideInputDoFnRunner<>(underlying, views, sideInputReader);
  }

  private PushbackSideInputDoFnRunner(
      DoFnRunner underlying,
      Collection> views,
      ReadyCheckingSideInputReader sideInputReader) {
    this.underlying = underlying;
    this.views = views;
    this.sideInputReader = sideInputReader;
  }

  @Override
  public void startBundle() {
    notReadyWindows = new HashSet<>();
    underlying.startBundle();
  }

  /**
   * Call the underlying {@link DoFnRunner#processElement(WindowedValue)} for the provided element
   * for each window the element is in that is ready.
   *
   * @param elem the element to process in all ready windows
   * @return each element that could not be processed because it requires a side input window
   * that is not ready.
   */
  public Iterable> processElementInReadyWindows(WindowedValue elem) {
    if (views.isEmpty()) {
      processElement(elem);
      return Collections.emptyList();
    }
    ImmutableList.Builder> pushedBack = ImmutableList.builder();
    for (WindowedValue windowElem : elem.explodeWindows()) {
      BoundedWindow mainInputWindow = Iterables.getOnlyElement(windowElem.getWindows());
      boolean isReady = !notReadyWindows.contains(mainInputWindow);
      for (PCollectionView view : views) {
        BoundedWindow sideInputWindow =
            view.getWindowingStrategyInternal()
                .getWindowFn()
                .getSideInputWindow(mainInputWindow);
        if (!sideInputReader.isReady(view, sideInputWindow)) {
          isReady = false;
          break;
        }
      }
      if (isReady) {
        processElement(windowElem);
      } else {
        notReadyWindows.add(mainInputWindow);
        pushedBack.add(windowElem);
      }
    }
    return pushedBack.build();
  }

  @Override
  public void processElement(WindowedValue elem) {
    underlying.processElement(elem);
  }

  /**
   * Call the underlying {@link DoFnRunner#finishBundle()}.
   */
  @Override
  public void finishBundle() {
    notReadyWindows = null;
    underlying.finishBundle();
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy