com.hazelcast.jet.core.WatermarkPolicies Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-jet Show documentation
There is a newer version: 4.5.4
/*
 * Copyright (c) 2008-2018, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.core;

import com.hazelcast.jet.function.DistributedSupplier;
import com.hazelcast.jet.impl.util.WatermarkPolicyUtil;
import com.hazelcast.jet.impl.util.WatermarkPolicyUtil.WatermarkPolicyBase;

import javax.annotation.Nonnull;

import static com.hazelcast.jet.impl.util.TimestampHistory.DEFAULT_NUM_STORED_SAMPLES;
import static com.hazelcast.util.Preconditions.checkNotNegative;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

/**
 * Utility class with factories of several useful watermark policies.
 */
public final class WatermarkPolicies {

    private WatermarkPolicies() {
    }

    /**
     * Maintains a watermark that lags behind the top observed timestamp by the
     * given amount. In the case of a stream lull the watermark does not
     * advance towards the top observed timestamp and remains behind it
     * indefinitely.
     *
     * @param lag the desired difference between the top observed timestamp
     *            and the watermark
     */
    @Nonnull
    public static DistributedSupplier limitingLag(long lag) {
        checkNotNegative(lag, "lag must not be negative");

        return () -> new WatermarkPolicyBase() {
            @Override
            public long reportEvent(long timestamp) {
                // avoid overflow
                if (timestamp < Long.MIN_VALUE + lag) {
                    return Long.MIN_VALUE;
                }
                return makeWmAtLeast(timestamp - lag);
            }
        };
    }

    /**
     * Maintains a watermark that lags behind the top observed timestamp by at
     * most the given amount and is additionally guaranteed to reach the
     * timestamp of any given event within {@code maxDelayMs} of system time
     * after observing it.
     * 
     * Keep in mind that the scope of the "top observed timestamp" is only the
     * items received by an individual processor: the watermark of a processor
     * using this policy will never advance beyond the timestamp it observed in
     * isolation from the rest of the system. In the case of severe stream skew
     * a given processor's watermark may lag significantly behind that of other
     * processors and cause delays when getting coalesced with them in
     * downstream processors. To overcome this you can also configure {@link
     * com.hazelcast.jet.config.JobConfig#setMaxWatermarkRetainMillis
     * JobConfig.setMaxWatermarkRetainMillis}.
     *
     * @param lag upper bound on the difference between the top observed timestamp and the
     *               watermark
     * @param maxDelayMs upper bound (in milliseconds) on how long it can take for the
     *                   watermark to reach any observed event's timestamp
     */
    @Nonnull
    public static DistributedSupplier limitingLagAndDelay(long lag, long maxDelayMs) {
        return () -> WatermarkPolicyUtil.limitingLagAndDelay(
                lag, MILLISECONDS.toNanos(maxDelayMs), DEFAULT_NUM_STORED_SAMPLES, System::nanoTime);
    }

    /**
     * Maintains watermark that lags behind the top timestamp by at most
     * {@code timestampLag} and behind wall-clock time by at most {@code
     * wallClockLag}. It assumes that the event timestamp is in milliseconds
     * since Unix epoch and will use that fact to correlate it with wall-clock
     * time acquired from the underlying OS. Note that wall-clock time is
     * non-monotonic and sudden jumps that may occur in it will cause temporary
     * disruptions in the functioning of this policy.
     * 

     * In most cases the {@link #limitingLagAndLull(long, long)
     * limitingLagAndLull} policy should be preferred; this is a backup option
     * for cases where some substreams may never see an event.
     *
     * @param timestampLag maximum difference between the top observed timestamp
     *                     and the watermark
     * @param wallClockLag maximum difference between the current value of
     *                     {@code System.currentTimeMillis} and the watermark
     */
    @Nonnull
    public static DistributedSupplier limitingTimestampAndWallClockLag(
            long timestampLag, long wallClockLag
    ) {
        return () -> WatermarkPolicyUtil.limitingTimestampAndWallClockLag(timestampLag, wallClockLag,
                System::currentTimeMillis);
    }

    /**
     * Maintains watermark that lags behind the top timestamp by the amount
     * specified with {@code lag}. Assumes that the event timestamp is given
     * in milliseconds and will use that fact to correlate it with the passage
     * of system time. There is no requirement on any specific point of origin
     * for the timestamp, i.e., the zero value can denote any point in time as
     * long as it is fixed.
     * 

     * When the defined {@code maxLullMs} period elapses without observing more
     * events, watermark will start advancing in lockstep with system time
     * acquired from the underlying OS's monotonic clock.
     * 
     * If no event is ever observed, watermark will advance from the initial
     * value of {@code Long.MIN_VALUE}. Therefore this policy can be used only
     * when there is a guarantee that each substream will emit at least one
     * event that will initialize the timestamp. Otherwise the empty substream
     * will hold back the processing of all other substreams by keeping the
     * watermark below any realistic value.
     *
     * @param lag the desired difference between the top observed timestamp
     *               and the watermark
     * @param maxLullMs maximum duration of a lull period before starting to
     *                  advance watermark with system time
     */
    @Nonnull
    public static DistributedSupplier limitingLagAndLull(long lag, long maxLullMs) {
        return () -> WatermarkPolicyUtil.limitingLagAndLull(lag, maxLullMs, System::nanoTime);
    }
}