All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.impl.pipeline.transform.WindowAggregateTransform Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.impl.pipeline.transform;

import com.hazelcast.function.ToLongFunctionEx;
import com.hazelcast.jet.aggregate.AggregateOperation;
import com.hazelcast.jet.core.Edge;
import com.hazelcast.jet.core.SlidingWindowPolicy;
import com.hazelcast.jet.core.TimestampKind;
import com.hazelcast.jet.core.Vertex;
import com.hazelcast.jet.core.function.KeyedWindowResultFunction;
import com.hazelcast.jet.datamodel.WindowResult;
import com.hazelcast.jet.impl.JetEvent;
import com.hazelcast.jet.impl.pipeline.PipelineImpl.Context;
import com.hazelcast.jet.impl.pipeline.Planner;
import com.hazelcast.jet.impl.pipeline.Planner.PlannerVertex;
import com.hazelcast.jet.impl.util.ConstantFunctionEx;
import com.hazelcast.jet.pipeline.SessionWindowDefinition;
import com.hazelcast.jet.pipeline.SlidingWindowDefinition;
import com.hazelcast.jet.pipeline.WindowDefinition;

import javax.annotation.Nonnull;
import java.io.Serial;
import java.util.List;

import static com.hazelcast.jet.core.Edge.between;
import static com.hazelcast.jet.core.SlidingWindowPolicy.slidingWinPolicy;
import static com.hazelcast.jet.core.Vertex.LOCAL_PARALLELISM_USE_DEFAULT;
import static com.hazelcast.jet.core.processor.Processors.accumulateByFrameP;
import static com.hazelcast.jet.core.processor.Processors.aggregateToSessionWindowP;
import static com.hazelcast.jet.core.processor.Processors.aggregateToSlidingWindowP;
import static com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP;
import static com.hazelcast.jet.impl.JetEvent.jetEvent;
import static com.hazelcast.jet.impl.pipeline.transform.AggregateTransform.FIRST_STAGE_VERTEX_NAME_SUFFIX;
import static java.util.Collections.nCopies;

public class WindowAggregateTransform extends AbstractTransform {

    @Serial
    private static final long serialVersionUID = 1L;

    private static final int MAX_WATERMARK_STRIDE = 100;
    private static final int MIN_WMS_PER_SESSION = 100;
    @SuppressWarnings("rawtypes")
    private static final KeyedWindowResultFunction JET_EVENT_WINDOW_RESULT_FN =
            (start, end, ignoredKey, result, isEarly) ->
                    jetEvent(end - 1, new WindowResult<>(start, end, result, isEarly));

    @Nonnull
    private final AggregateOperation aggrOp;
    @Nonnull
    private final WindowDefinition wDef;

    public WindowAggregateTransform(
            @Nonnull List upstream,
            @Nonnull WindowDefinition wDef,
            @Nonnull AggregateOperation aggrOp
    ) {
        super(createName(wDef), upstream);
        this.aggrOp = aggrOp;
        this.wDef = wDef;
    }

    static String createName(WindowDefinition wDef) {
        if (wDef instanceof SlidingWindowDefinition) {
            return "sliding-window";
        } else if (wDef instanceof SessionWindowDefinition) {
            return "session-window";
        } else {
            throw new IllegalArgumentException(wDef.getClass().getName());
        }
    }

    /**
     * Returns the optimal watermark stride for this window definition.
     * Watermarks that are more spaced out are better for performance, but they
     * hurt the responsiveness of a windowed pipeline stage. The Planner will
     * determine the actual stride, which may be an integer fraction of the
     * value returned here.
     */
    static long preferredWatermarkStride(WindowDefinition wDef) {
        if (wDef instanceof SlidingWindowDefinition definition) {
            return definition.slideBy();
        } else if (wDef instanceof SessionWindowDefinition definition) {
            long timeout = definition.sessionTimeout();
            return Math.min(MAX_WATERMARK_STRIDE, Math.max(1, timeout / MIN_WMS_PER_SESSION));
        } else {
            throw new IllegalArgumentException(wDef.getClass().getName());
        }
    }

    @Override
    public long preferredWatermarkStride() {
        return preferredWatermarkStride(wDef);
    }

    @Override
    public void addToDag(Planner p, Context context) {
        if (wDef instanceof SessionWindowDefinition definition) {
            addSessionWindow(p, definition);
        } else if (aggrOp.combineFn() == null || wDef.earlyResultsPeriod() > 0) {
            addSlidingWindowSingleStage(p, (SlidingWindowDefinition) wDef);
        } else {
            addSlidingWindowTwoStage(p, (SlidingWindowDefinition) wDef, context);
        }
    }

    //               ---------       ---------
    //              | source0 | ... | sourceN |
    //               ---------       ---------
    //                   |              |
    //              distributed    distributed
    //              all-to-one      all-to-one
    //                   \              /
    //                    ---\    /-----
    //                        v  v
    //             ---------------------------
    //            | aggregateToSlidingWindowP | local parallelism = 1
    //             ---------------------------
    private void addSlidingWindowSingleStage(Planner p, SlidingWindowDefinition wDef) {
        determinedLocalParallelism(1);
        PlannerVertex pv = p.addVertex(this, name(), determinedLocalParallelism(),
                aggregateToSlidingWindowP(
                        nCopies(aggrOp.arity(), new ConstantFunctionEx<>(name().hashCode())),
                        nCopies(aggrOp.arity(), (ToLongFunctionEx>) JetEvent::timestamp),
                        TimestampKind.EVENT,
                        slidingWinPolicy(wDef.windowSize(), wDef.slideBy()),
                        wDef.earlyResultsPeriod(),
                        aggrOp,
                        jetEventOfWindowResultFn()
                ));
        p.addEdges(this, pv.v, edge -> edge.distributed().allToOne(name().hashCode()));
    }

    // WHEN PRESERVE ORDER IS NOT ACTIVE
    //               --------        ---------
    //              | source0 | ... | sourceN |
    //               --------        ---------
    //                   |               |
    //                 local           local
    //                unicast         unicast
    //                   v               v
    //                  --------------------
    //                 | accumulateByFrameP | keyFn = constantKey()
    //                  --------------------
    //                           |
    //                      distributed
    //                       all-to-one
    //                           v
    //               -------------------------
    //              | combineToSlidingWindowP | local parallelism = 1
    //               -------------------------
    // WHEN PRESERVE ORDER IS ACTIVE
    //               --------        ---------
    //              | source0 | ... | sourceN |
    //               --------        ---------
    //                   |               |
    //                isolated        isolated
    //                   v               v
    //                  --------------------
    //                 | accumulateByFrameP | keyFn = constantKey()
    //                  --------------------
    //                           |
    //                      distributed
    //                       all-to-one
    //                           v
    //               -------------------------
    //              | combineToSlidingWindowP | local parallelism = 1
    //               -------------------------
    private void addSlidingWindowTwoStage(Planner p, SlidingWindowDefinition wDef, Context context) {
        determineLocalParallelism(LOCAL_PARALLELISM_USE_DEFAULT, context, p.isPreserveOrder());
        SlidingWindowPolicy winPolicy = slidingWinPolicy(wDef.windowSize(), wDef.slideBy());
        Vertex v1 = p.dag.newVertex(name() + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByFrameP(
                nCopies(aggrOp.arity(), new ConstantFunctionEx<>(name().hashCode())),
                nCopies(aggrOp.arity(), (ToLongFunctionEx>) JetEvent::timestamp),
                TimestampKind.EVENT,
                winPolicy,
                aggrOp
        ));

        v1.localParallelism(determinedLocalParallelism());
        if (p.isPreserveOrder()) {
            p.addEdges(this, v1, Edge::isolated);
        } else {
            // when preserveOrder is false, we use requested parallelism
            // for 1st stage: edge to it is local-unicast, each processor
            // can process part of the input which will be combined into
            // one result in 2nd stage.
            p.addEdges(this, v1);
        }
        determinedLocalParallelism(1);
        PlannerVertex pv2 = p.addVertex(this, name(), determinedLocalParallelism(),
                combineToSlidingWindowP(winPolicy, aggrOp, jetEventOfWindowResultFn()));

        p.dag.edge(between(v1, pv2.v).distributed().allToOne(name().hashCode()));
    }

    //               ---------       ---------
    //              | source0 | ... | sourceN |
    //               ---------       ---------
    //                   |              |
    //              distributed    distributed
    //              all-to-one      all-to-one
    //                   \              /
    //                    ---\    /-----
    //                        v  v
    //             ---------------------------
    //            | aggregateToSessionWindowP | local parallelism = 1
    //             ---------------------------
    private void addSessionWindow(Planner p, SessionWindowDefinition wDef) {
        determinedLocalParallelism(1);
        PlannerVertex pv = p.addVertex(this, name(), determinedLocalParallelism(),
                aggregateToSessionWindowP(
                        wDef.sessionTimeout(),
                        wDef.earlyResultsPeriod(),
                        nCopies(aggrOp.arity(), (ToLongFunctionEx>) JetEvent::timestamp),
                        nCopies(aggrOp.arity(), new ConstantFunctionEx<>(name().hashCode())),
                        aggrOp,
                        jetEventOfWindowResultFn()));
        p.addEdges(this, pv.v, edge -> edge.distributed().allToOne(name().hashCode()));
    }

    @SuppressWarnings("unchecked")
    private static  KeyedWindowResultFunction> jetEventOfWindowResultFn() {
        return JET_EVENT_WINDOW_RESULT_FN;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy