All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows Maven / Gradle / Ivy
Go to download
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms.windowing;
import com.google.cloud.dataflow.sdk.annotations.Experimental;
import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
import org.joda.time.Duration;
import org.joda.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
/**
* A {@link WindowFn} that windows values into possibly overlapping fixed-size
* timestamp-based windows.
*
* For example, in order to window data into 10 minute windows that
* update every minute:
*
{@code
* PCollection items = ...;
* PCollection windowedItems = items.apply(
* Window.into(SlidingWindows.of(Duration.standardMinutes(10))));
* }
*/
public class SlidingWindows extends NonMergingWindowFn {
/**
* Amount of time between generated windows.
*/
private final Duration period;
/**
* Size of the generated windows.
*/
private final Duration size;
/**
* Offset of the generated windows.
* Windows start at time N * start + offset, where 0 is the epoch.
*/
private final Duration offset;
/**
* Assigns timestamps into half-open intervals of the form
* [N * period, N * period + size), where 0 is the epoch.
*
* If {@link SlidingWindows#every} is not called, the period defaults
* to the largest time unit smaller than the given duration. For example,
* specifying a size of 5 seconds will result in a default period of 1 second.
*/
public static SlidingWindows of(Duration size) {
return new SlidingWindows(getDefaultPeriod(size), size, Duration.ZERO);
}
/**
* Returns a new {@code SlidingWindows} with the original size, that assigns
* timestamps into half-open intervals of the form
* [N * period, N * period + size), where 0 is the epoch.
*/
public SlidingWindows every(Duration period) {
return new SlidingWindows(period, size, offset);
}
/**
* Assigns timestamps into half-open intervals of the form
* [N * period + offset, N * period + offset + size).
*
* @throws IllegalArgumentException if offset is not in [0, period)
*/
public SlidingWindows withOffset(Duration offset) {
return new SlidingWindows(period, size, offset);
}
private SlidingWindows(Duration period, Duration size, Duration offset) {
if (offset.isShorterThan(Duration.ZERO)
|| !offset.isShorterThan(period)
|| !size.isLongerThan(Duration.ZERO)) {
throw new IllegalArgumentException(
"SlidingWindows WindowingStrategies must have 0 <= offset < period and 0 < size");
}
this.period = period;
this.size = size;
this.offset = offset;
}
@Override
public Coder windowCoder() {
return IntervalWindow.getCoder();
}
@Override
public Collection assignWindows(AssignContext c) {
List windows =
new ArrayList<>((int) (size.getMillis() / period.getMillis()));
Instant timestamp = c.timestamp();
long lastStart = lastStartFor(timestamp);
for (long start = lastStart;
start > timestamp.minus(size).getMillis();
start -= period.getMillis()) {
windows.add(new IntervalWindow(new Instant(start), size));
}
return windows;
}
/**
* Return the earliest window that contains the end of the main-input window.
*/
@Override
public IntervalWindow getSideInputWindow(final BoundedWindow window) {
if (window instanceof GlobalWindow) {
throw new IllegalArgumentException(
"Attempted to get side input window for GlobalWindow from non-global WindowFn");
}
long lastStart = lastStartFor(window.maxTimestamp().minus(size));
return new IntervalWindow(new Instant(lastStart + period.getMillis()), size);
}
@Override
public boolean isCompatible(WindowFn other) {
return equals(other);
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
super.populateDisplayData(builder);
builder
.add(DisplayData.item("size", size)
.withLabel("Window Size"))
.add(DisplayData.item("period", period)
.withLabel("Window Period"))
.add(DisplayData.item("offset", offset)
.withLabel("Window Start Offset"));
}
/**
* Return the last start of a sliding window that contains the timestamp.
*/
private long lastStartFor(Instant timestamp) {
return timestamp.getMillis()
- timestamp.plus(period).minus(offset).getMillis() % period.getMillis();
}
static Duration getDefaultPeriod(Duration size) {
if (size.isLongerThan(Duration.standardHours(1))) {
return Duration.standardHours(1);
}
if (size.isLongerThan(Duration.standardMinutes(1))) {
return Duration.standardMinutes(1);
}
if (size.isLongerThan(Duration.standardSeconds(1))) {
return Duration.standardSeconds(1);
}
return Duration.millis(1);
}
public Duration getPeriod() {
return period;
}
public Duration getSize() {
return size;
}
public Duration getOffset() {
return offset;
}
/**
* Ensures that later sliding windows have an output time that is past the end of earlier windows.
*
* If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
* Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
*/
@Experimental(Kind.OUTPUT_TIME)
@Override
public OutputTimeFn getOutputTimeFn() {
return new OutputTimeFn.Defaults() {
@Override
public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
Instant startOfLastSegment = window.maxTimestamp().minus(period);
return startOfLastSegment.isBefore(inputTimestamp)
? inputTimestamp
: startOfLastSegment.plus(1);
}
@Override
public boolean dependsOnlyOnEarliestInputTimestamp() {
return true;
}
};
}
@Override
public boolean equals(Object object) {
if (!(object instanceof SlidingWindows)) {
return false;
}
SlidingWindows other = (SlidingWindows) object;
return getOffset().equals(other.getOffset())
&& getSize().equals(other.getSize())
&& getPeriod().equals(other.getPeriod());
}
@Override
public int hashCode() {
return Objects.hash(size, offset, period);
}
}