
com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaIteratorsDoFn Maven / Gradle / Ivy
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util;
import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
import com.google.cloud.dataflow.sdk.util.common.Reiterable;
import com.google.cloud.dataflow.sdk.util.common.Reiterator;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import org.joda.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* {@link GroupAlsoByWindowsDoFn} that uses reiterators to handle non-merging window functions with
* the default triggering strategy.
*
* @param key type
* @param value element type
* @param window type
*/
@SystemDoFnInternal
@SuppressWarnings("serial")
class GroupAlsoByWindowsViaIteratorsDoFn
extends GroupAlsoByWindowsDoFn, W> {
private final WindowingStrategy, W> strategy;
public static boolean isSupported(WindowingStrategy, ?> strategy) {
if (!strategy.getWindowFn().isNonMerging()) {
return false;
}
// TODO: Add support for other triggers.
if (!(strategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
return false;
}
// Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
// DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
// trigger (after watermark) will only fire once.
if (!strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
&& !strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES)) {
return false;
}
return true;
}
public GroupAlsoByWindowsViaIteratorsDoFn(WindowingStrategy, W> strategy) {
this.strategy = strategy;
}
@Override
public void processElement(ProcessContext c) throws Exception {
K key = c.element().getKey();
Iterable> value = c.element().getValue();
PeekingReiterator> iterator;
if (value instanceof Collection) {
iterator = new PeekingReiterator<>(new ListReiterator>(
new ArrayList>((Collection>) value), 0));
} else if (value instanceof Reiterable) {
iterator = new PeekingReiterator<>(((Reiterable>) value).iterator());
} else {
throw new IllegalArgumentException(
"Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
}
// This ListMultimap is a map of window maxTimestamps to the list of active
// windows with that maxTimestamp.
ListMultimap windows = ArrayListMultimap.create();
while (iterator.hasNext()) {
WindowedValue e = iterator.peek();
for (BoundedWindow window : e.getWindows()) {
// If this window is not already in the active set, emit a new WindowReiterable
// corresponding to this window, starting at this element in the input Reiterable.
if (!windows.containsEntry(window.maxTimestamp(), window)) {
// This window was produced by strategy.getWindowFn()
@SuppressWarnings("unchecked")
W typedWindow = (W) window;
// Iterating through the WindowReiterable may advance iterator as an optimization
// for as long as it detects that there are no new windows.
windows.put(window.maxTimestamp(), window);
c.windowingInternals().outputWindowedValue(
KV.of(key, (Iterable) new WindowReiterable(iterator, window)),
strategy.getWindowFn().getOutputTime(e.getTimestamp(), typedWindow),
Arrays.asList(window),
PaneInfo.ON_TIME_AND_ONLY_FIRING);
}
}
// Copy the iterator in case the next DoFn cached its version of the iterator instead
// of immediately iterating through it.
// And, only advance the iterator if the consuming operation hasn't done so.
iterator = iterator.copy();
if (iterator.hasNext() && iterator.peek() == e) {
iterator.next();
}
// Remove all windows with maxTimestamp behind the current timestamp.
Iterator windowIterator = windows.keys().iterator();
while (windowIterator.hasNext()
&& windowIterator.next().isBefore(e.getTimestamp())) {
windowIterator.remove();
}
}
}
/**
* {@link Reiterable} representing a view of all elements in a base
* {@link Reiterator} that are in a given window.
*/
private static class WindowReiterable implements Reiterable {
private PeekingReiterator> baseIterator;
private BoundedWindow window;
public WindowReiterable(
PeekingReiterator> baseIterator, BoundedWindow window) {
this.baseIterator = baseIterator;
this.window = window;
}
@Override
public Reiterator iterator() {
// We don't copy the baseIterator when creating the first WindowReiterator
// so that the WindowReiterator can advance the baseIterator. We have to
// make a copy afterwards so that future calls to iterator() will start
// at the right spot.
Reiterator result = new WindowReiterator(baseIterator, window);
baseIterator = baseIterator.copy();
return result;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.addValue(Iterables.toString(this))
.toString();
}
}
/**
* The {@link Reiterator} used by
* {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaIteratorsDoFn.WindowReiterable}.
*/
private static class WindowReiterator implements Reiterator {
private PeekingReiterator> iterator;
private BoundedWindow window;
public WindowReiterator(PeekingReiterator> iterator, BoundedWindow window) {
this.iterator = iterator;
this.window = window;
}
@Override
public Reiterator copy() {
return new WindowReiterator(iterator.copy(), window);
}
@Override
public boolean hasNext() {
skipToValidElement();
return (iterator.hasNext() && iterator.peek().getWindows().contains(window));
}
@Override
public V next() {
skipToValidElement();
WindowedValue next = iterator.next();
if (!next.getWindows().contains(window)) {
throw new NoSuchElementException("No next item in window");
}
return next.getValue();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
/**
* Moves the underlying iterator forward until it either points to the next
* element in the correct window, or is past the end of the window.
*/
private void skipToValidElement() {
while (iterator.hasNext()) {
WindowedValue peek = iterator.peek();
if (peek.getTimestamp().isAfter(window.maxTimestamp())) {
// We are past the end of this window, so there can't be any more
// elements in this iterator.
break;
}
if (!(peek.getWindows().size() == 1 && peek.getWindows().contains(window))) {
// We have reached new windows; we need to copy the iterator so we don't
// keep advancing the outer loop in processElement.
iterator = iterator.copy();
}
if (!peek.getWindows().contains(window)) {
// The next element is not in the right window: skip it.
iterator.next();
} else {
// The next element is in the right window.
break;
}
}
}
}
/**
* {@link Reiterator} that wraps a {@link List}.
*/
private static class ListReiterator implements Reiterator {
private List list;
private int index;
public ListReiterator(List list, int index) {
this.list = list;
this.index = index;
}
@Override
public T next() {
return list.get(index++);
}
@Override
public boolean hasNext() {
return index < list.size();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public Reiterator copy() {
return new ListReiterator(list, index);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy