com.hazelcast.jet.core.EventTimePolicy Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2021, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.core;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.function.ToLongFunctionEx;
import com.hazelcast.jet.core.function.ObjLongBiFunction;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.Serializable;
import static com.hazelcast.internal.util.Preconditions.checkNotNegative;
import static com.hazelcast.internal.util.Preconditions.checkTrue;
import static com.hazelcast.jet.impl.util.Util.checkSerializable;
/**
* A holder of functions and parameters Jet needs to handle event time and the
* associated watermarks. These are the components:
* -
* {@code timestampFn}: extracts the timestamp from an event in the stream
*
-
* {@code newWmPolicyFn}: a factory of watermark policy objects.
* Refer to its {@linkplain WatermarkPolicy documentation} for explanation.
*
-
* frame size and frame offset for watermark throttling:
* they allow the processor to filter out redundant watermark items before
* emitting them. For example, a sliding/tumbling window processor doesn't need
* to observe more than one watermark item per frame.
*
-
* {@code idleTimeoutMillis}: a measure to mitigate the issue with temporary
* lulls in a distributed event stream. It pertains to each partition
* of a data source independently. If Jet doesn't receive any events
* from a given partition for this long, it will mark it as "idle" and let
* the watermark in downstream vertices advance as if the partition didn't
* exist.
*
-
* {@code wrapFn}: a function that transforms a given event and its
* timestamp into the item to emit from the processor. For example, the
* Pipeline API uses this to wrap items into {@code JetEvent}s as a way
* to propagate the event timestamps through the pipeline regardless of
* the transformation the user does on the event objects themselves.
*
*
* This class should be used with {@link EventTimeMapper} when implementing a
* source processor.
*
* @param event type
*
* @since Jet 3.0
*/
public final class EventTimePolicy implements Serializable {
/**
* The default idle timeout in milliseconds.
*/
public static final long DEFAULT_IDLE_TIMEOUT = 60_000L;
private static final long serialVersionUID = 1L;
private static final ObjLongBiFunction NO_WRAPPING = (event, timestamp) -> event;
private static final SupplierEx NO_WATERMARKS = () -> new WatermarkPolicy() {
@Override
public void reportEvent(long timestamp) {
}
@Override
public long getCurrentWatermark() {
return Long.MIN_VALUE;
}
};
private final ToLongFunctionEx timestampFn;
private final ObjLongBiFunction wrapFn;
private final SupplierEx newWmPolicyFn;
private final long watermarkThrottlingFrameSize;
private final long watermarkThrottlingFrameOffset;
private final long idleTimeoutMillis;
private EventTimePolicy(
@Nullable ToLongFunctionEx timestampFn,
@Nonnull ObjLongBiFunction wrapFn,
@Nonnull SupplierEx newWmPolicyFn,
long watermarkThrottlingFrameSize,
long watermarkThrottlingFrameOffset,
long idleTimeoutMillis
) {
checkNotNegative(watermarkThrottlingFrameSize, "watermarkThrottlingFrameSize must be >= 0");
checkNotNegative(watermarkThrottlingFrameOffset, "watermarkThrottlingFrameOffset must be >= 0");
checkTrue(watermarkThrottlingFrameOffset < watermarkThrottlingFrameSize || watermarkThrottlingFrameSize == 0,
"offset must be smaller than frame size");
checkNotNegative(idleTimeoutMillis, "idleTimeoutMillis must be >= 0 (0 means disabled)");
this.timestampFn = timestampFn;
this.newWmPolicyFn = newWmPolicyFn;
this.wrapFn = wrapFn;
this.idleTimeoutMillis = idleTimeoutMillis;
this.watermarkThrottlingFrameSize = watermarkThrottlingFrameSize;
this.watermarkThrottlingFrameOffset = watermarkThrottlingFrameOffset;
}
/**
* Creates and returns a new event time policy. To get a policy that
* results in no timestamping, call {@link #noEventTime()}.
*
* @param timestampFn function that extracts the timestamp from the event;
* if null, Jet will use the source's native timestamp
* @param wrapFn function that transforms the received item and its
* timestamp into the emitted item
* @param newWmPolicyFn factory of the watermark policy objects
* @param watermarkThrottlingFrameSize the frame length to which we
* throttle watermarks, see {@link #watermarkThrottlingFrameSize()}
* @param watermarkThrottlingFrameOffset the frame offset to which we
* throttle watermarks, see {@link #watermarkThrottlingFrameOffset()}
* @param idleTimeoutMillis the timeout after which a partition will be
* marked as idle. Use 0 to disable the feature.
*/
public static EventTimePolicy eventTimePolicy(
@Nullable ToLongFunctionEx timestampFn,
@Nonnull ObjLongBiFunction wrapFn,
@Nonnull SupplierEx newWmPolicyFn,
long watermarkThrottlingFrameSize,
long watermarkThrottlingFrameOffset,
long idleTimeoutMillis
) {
checkSerializable(timestampFn, "timestampFn");
checkSerializable(timestampFn, "wrapFn");
checkSerializable(newWmPolicyFn, "newWmPolicyFn");
return new EventTimePolicy<>(timestampFn, wrapFn, newWmPolicyFn, watermarkThrottlingFrameSize,
watermarkThrottlingFrameOffset, idleTimeoutMillis);
}
/**
* Creates and returns a new event time policy. To get a policy that
* results in no watermarks being emitted, call {@link
* #noEventTime()}.
*
* @param timestampFn function that extracts the timestamp from the event;
* if null, Jet will use the source's native timestamp
* @param newWmPolicyFn factory of the watermark policy objects
* @param watermarkThrottlingFrameSize the frame length to which we
* throttle watermarks, see {@link #watermarkThrottlingFrameSize()}
* @param watermarkThrottlingFrameOffset the frame offset to which we
* throttle watermarks, see {@link #watermarkThrottlingFrameOffset()}
* @param idleTimeoutMillis the timeout after which a partition will be
* marked as idle.
*/
public static EventTimePolicy eventTimePolicy(
@Nullable ToLongFunctionEx timestampFn,
@Nonnull SupplierEx newWmPolicyFn,
long watermarkThrottlingFrameSize,
long watermarkThrottlingFrameOffset,
long idleTimeoutMillis
) {
return eventTimePolicy(timestampFn, noWrapping(), newWmPolicyFn, watermarkThrottlingFrameSize,
watermarkThrottlingFrameOffset, idleTimeoutMillis);
}
/**
* Returns an event time policy that results in no timestamping. Only
* useful in jobs with streaming sources that don't do any aggregation.
* If there is an aggregation step in the job and you use these parameters,
* your job will keep accumulating the data without producing any output.
*/
public static EventTimePolicy noEventTime() {
return eventTimePolicy(i -> Long.MIN_VALUE, noWrapping(), NO_WATERMARKS, 0, 0, 0);
}
@SuppressWarnings("unchecked")
private static ObjLongBiFunction noWrapping() {
return (ObjLongBiFunction) NO_WRAPPING;
}
/**
* Returns the function that extracts the timestamp from the event.
*/
@Nullable
public ToLongFunctionEx timestampFn() {
return timestampFn;
}
/**
* Returns the function that transforms the received item and its timestamp
* into the emitted item.
*/
@Nonnull
public ObjLongBiFunction wrapFn() {
return wrapFn;
}
/**
* Returns the factory of the watermark policy objects.
*/
@Nonnull
public SupplierEx newWmPolicyFn() {
return newWmPolicyFn;
}
/**
* This value together with {@link #watermarkThrottlingFrameOffset()}
* specify the frame size the watermarks are throttled to. Generally it
* should match the window slide step used downstream. If there are
* multiple sliding windows downstream, use the greatest common denominator
* of them.
*
* If this parameter is equal to 0, all watermarks will be suppressed.
*
* Technically, a watermark should be emitted after every increase in event
* time. Because watermarks are broadcast from each processor to all
* downstream processors, this will bring some overhead. But the watermarks
* are only needed for window aggregation and only when a window should
* close, that is at the frame boundary of a sliding window. To reduce the
* amount of watermarks on the stream, you can configure to emit only those
* watermarks that would trigger an emission of a new window.
*/
public long watermarkThrottlingFrameSize() {
return watermarkThrottlingFrameSize;
}
/**
* See {@link #watermarkThrottlingFrameSize()}
*/
public long watermarkThrottlingFrameOffset() {
return watermarkThrottlingFrameOffset;
}
/**
* Returns the amount of time allowed to pass without receiving any events
* from a partition before marking it as "idle". When the partition
* becomes idle, the processor emits an {@link
* com.hazelcast.jet.impl.execution.WatermarkCoalescer#IDLE_MESSAGE} to its
* output edges. This signals Jet that the watermark can advance as
* if the partition didn't exist.
*
* If you supply a zero or negative value, partitions will never be marked
* as idle.
*/
public long idleTimeoutMillis() {
return idleTimeoutMillis;
}
}