All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.streams.kstream.internals.KStreamSessionWindowAggregate Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.streams.kstream.internals;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.EmitStrategy;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.Merger;
import org.apache.kafka.streams.kstream.SessionWindows;
import org.apache.kafka.streams.kstream.Windowed;
import org.apache.kafka.streams.processor.api.ContextualProcessor;
import org.apache.kafka.streams.processor.api.Processor;
import org.apache.kafka.streams.processor.api.ProcessorContext;
import org.apache.kafka.streams.processor.api.Record;
import org.apache.kafka.streams.processor.api.RecordMetadata;
import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.SessionStore;
import org.apache.kafka.streams.state.ValueAndTimestamp;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

import static org.apache.kafka.streams.StreamsConfig.InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION;
import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emitFinalLatencySensor;
import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emittedRecordsSensor;
import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensor;

public class KStreamSessionWindowAggregate implements KStreamAggProcessorSupplier, VAgg> {

    private static final Logger LOG = LoggerFactory.getLogger(KStreamSessionWindowAggregate.class);

    private final String storeName;
    private final SessionWindows windows;
    private final Initializer initializer;
    private final Aggregator aggregator;
    private final Merger sessionMerger;
    private final EmitStrategy emitStrategy;

    private boolean sendOldValues = false;

    public KStreamSessionWindowAggregate(final SessionWindows windows,
                                         final String storeName,
                                         final EmitStrategy emitStrategy,
                                         final Initializer initializer,
                                         final Aggregator aggregator,
                                         final Merger sessionMerger) {
        this.windows = windows;
        this.storeName = storeName;
        this.emitStrategy = emitStrategy;
        this.initializer = initializer;
        this.aggregator = aggregator;
        this.sessionMerger = sessionMerger;
    }

    @Override
    public Processor, Change> get() {
        return new KStreamSessionWindowAggregateProcessor();
    }

    public SessionWindows windows() {
        return windows;
    }

    @Override
    public void enableSendingOldValues() {
        sendOldValues = true;
    }

    private class KStreamSessionWindowAggregateProcessor extends
        ContextualProcessor, Change> {

        private SessionStore store;
        private TimestampedTupleForwarder, VAgg> tupleForwarder;
        private Sensor droppedRecordsSensor;
        private Sensor emittedRecordsSensor;
        private Sensor emitFinalLatencySensor;
        private long lastEmitWindowCloseTime = ConsumerRecord.NO_TIMESTAMP;
        private long observedStreamTime = ConsumerRecord.NO_TIMESTAMP;
        private InternalProcessorContext, Change> internalProcessorContext;

        private final Time time = Time.SYSTEM;
        protected final KStreamImplJoin.TimeTracker timeTracker = new KStreamImplJoin.TimeTracker();

        @Override
        public void init(final ProcessorContext, Change> context) {
            super.init(context);
            internalProcessorContext = (InternalProcessorContext, Change>) context;
            final StreamsMetricsImpl metrics = (StreamsMetricsImpl) context.metrics();
            final String threadId = Thread.currentThread().getName();
            final String processorName = internalProcessorContext.currentNode().name();
            droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(), metrics);
            emittedRecordsSensor = emittedRecordsSensor(threadId, context.taskId().toString(), processorName, metrics);
            emitFinalLatencySensor = emitFinalLatencySensor(threadId, context.taskId().toString(), processorName, metrics);
            store = context.getStateStore(storeName);

            if (emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
                // Restore last emit close time for ON_WINDOW_CLOSE strategy
                final Long lastEmitWindowCloseTime = internalProcessorContext.processorMetadataForKey(storeName);
                if (lastEmitWindowCloseTime != null) {
                    this.lastEmitWindowCloseTime = lastEmitWindowCloseTime;
                }
                final long emitInterval = StreamsConfig.InternalConfig.getLong(
                        context.appConfigs(),
                        EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION,
                        1000L
                );
                timeTracker.setEmitInterval(emitInterval);

                tupleForwarder = new TimestampedTupleForwarder<>(context, sendOldValues);
            } else {
                tupleForwarder = new TimestampedTupleForwarder<>(
                    store,
                    context,
                    new SessionCacheFlushListener<>(context),
                    sendOldValues);
            }
        }

        @Override
        public void process(final Record record) {
            // if the key is null, we do not need proceed aggregating
            // the record with the table
            if (record.key() == null) {
                logSkippedRecordForNullKey();
                return;
            }

            final long timestamp = record.timestamp();
            observedStreamTime = Math.max(observedStreamTime, timestamp);
            final long windowCloseTime = observedStreamTime - windows.gracePeriodMs() - windows.inactivityGap();

            final List, VAgg>> merged = new ArrayList<>();
            final SessionWindow newSessionWindow = new SessionWindow(timestamp, timestamp);
            SessionWindow mergedWindow = newSessionWindow;
            VAgg agg = initializer.apply();

            try (
                final KeyValueIterator, VAgg> iterator = store.findSessions(
                    record.key(),
                    timestamp - windows.inactivityGap(),
                    timestamp + windows.inactivityGap()
                )
            ) {
                while (iterator.hasNext()) {
                    final KeyValue, VAgg> next = iterator.next();
                    merged.add(next);
                    agg = sessionMerger.apply(record.key(), agg, next.value);
                    mergedWindow = mergeSessionWindow(mergedWindow, (SessionWindow) next.key.window());
                }
            }

            if (mergedWindow.end() < windowCloseTime) {
                logSkippedRecordForExpiredWindow(timestamp, windowCloseTime, mergedWindow);
            } else {
                if (!mergedWindow.equals(newSessionWindow)) {
                    for (final KeyValue, VAgg> session : merged) {
                        store.remove(session.key);

                        maybeForwardUpdate(session.key, session.value, null);
                    }
                }

                agg = aggregator.apply(record.key(), record.value(), agg);
                final Windowed sessionKey = new Windowed<>(record.key(), mergedWindow);
                store.put(sessionKey, agg);

                maybeForwardUpdate(sessionKey, null, agg);
            }

            maybeForwardFinalResult(record, windowCloseTime);
        }

        private void maybeForwardUpdate(final Windowed windowedkey,
                                        final VAgg oldAgg,
                                        final VAgg newAgg) {
            if (emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
                return;
            }

            // Update the sent record timestamp to the window end time if possible
            final long newTimestamp = windowedkey.window().end();
            tupleForwarder.maybeForward(new Record<>(windowedkey, new Change<>(newAgg, sendOldValues ? oldAgg : null), newTimestamp));
        }

        // TODO: consolidate SessionWindow with TimeWindow to merge common functions
        private void maybeForwardFinalResult(final Record record, final long windowCloseTime) {
            if (shouldEmitFinal(windowCloseTime)) {
                final long emitRangeUpperBound = emitRangeUpperBound(windowCloseTime);

                // if the upper bound is smaller than 0, then there's no window closed ever;
                // and we can skip range fetching
                if (emitRangeUpperBound >= 0) {
                    final long emitRangeLowerBound = emitRangeLowerBound();

                    if (shouldRangeFetch(emitRangeLowerBound, emitRangeUpperBound)) {
                        fetchAndEmit(record, windowCloseTime, emitRangeLowerBound, emitRangeUpperBound);
                    }
                }
            }
        }

        private boolean shouldEmitFinal(final long windowCloseTime) {
            if (emitStrategy.type() != EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
                return false;
            }

            final long now = internalProcessorContext.currentSystemTimeMs();
            // Throttle emit frequency
            if (now < timeTracker.nextTimeToEmit) {
                return false;
            }

            // Schedule next emit time based on now to avoid the case that if system time jumps a lot,
            // this can be triggered every time
            timeTracker.nextTimeToEmit = now;
            timeTracker.advanceNextTimeToEmit();

            // Only EMIT if the window close time does progress
            return lastEmitWindowCloseTime == ConsumerRecord.NO_TIMESTAMP || lastEmitWindowCloseTime < windowCloseTime;
        }

        private long emitRangeLowerBound() {
            return Math.max(0L, lastEmitWindowCloseTime);
        }

        private long emitRangeUpperBound(final long windowCloseTime) {
            // Session window's start and end timestamps are inclusive, so
            // we should minus 1 for the inclusive closed window-end upper bound
            return windowCloseTime - 1;
        }

        private boolean shouldRangeFetch(final long emitRangeLowerBound, final long emitRangeUpperBound) {
            // since a session window could be a single point (i.e. [t, t]),
            // we need to range fetch and emit even if the upper and lower bound are the same
            return emitRangeUpperBound >= emitRangeLowerBound;
        }

        private void fetchAndEmit(final Record record,
                                  final long windowCloseTime,
                                  final long emitRangeLowerBound,
                                  final long emitRangeUpperBound) {
            final long startMs = time.milliseconds();

            int emittedCount = 0;

            // Only time ordered (indexed) session store should have implemented
            // this function, otherwise a not-supported exception would throw
            try (final KeyValueIterator, VAgg> windowToEmit = store
                    .findSessions(emitRangeLowerBound, emitRangeUpperBound)) {

                while (windowToEmit.hasNext()) {
                    emittedCount++;
                    final KeyValue, VAgg> kv = windowToEmit.next();

                    tupleForwarder.maybeForward(
                        record.withKey(kv.key)
                            .withValue(new Change<>(kv.value, null))
                            // set the timestamp as the window end timestamp
                            .withTimestamp(kv.key.window().end())
                            .withHeaders(record.headers()));
                }
            }
            emittedRecordsSensor.record(emittedCount);
            emitFinalLatencySensor.record(time.milliseconds() - startMs);

            lastEmitWindowCloseTime = windowCloseTime;
            internalProcessorContext.addProcessorMetadataKeyValue(storeName, windowCloseTime);
        }

        private void logSkippedRecordForNullKey() {
            if (context().recordMetadata().isPresent()) {
                final RecordMetadata recordMetadata = context().recordMetadata().get();
                LOG.warn(
                        "Skipping record due to null key. "
                                + "topic=[{}] partition=[{}] offset=[{}]",
                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset()
                );
            } else {
                LOG.warn(
                        "Skipping record due to null key. Topic, partition, and offset not known."
                );
            }
            droppedRecordsSensor.record();
        }

        private void logSkippedRecordForExpiredWindow(final long timestamp,
                                                      final long windowExpire,
                                                      final SessionWindow window) {
            final String windowString = "[" + window.start() + "," + window.end() + "]";

            if (context().recordMetadata().isPresent()) {
                final RecordMetadata recordMetadata = context().recordMetadata().get();
                LOG.warn("Skipping record for expired window. " +
                                "topic=[{}] " +
                                "partition=[{}] " +
                                "offset=[{}] " +
                                "timestamp=[{}] " +
                                "window={} " +
                                "expiration=[{}] " +
                                "streamTime=[{}]",
                        recordMetadata.topic(),
                        recordMetadata.partition(),
                        recordMetadata.offset(),
                        timestamp,
                        windowString,
                        windowExpire,
                        observedStreamTime
                );
            } else {
                LOG.warn("Skipping record for expired window. Topic, partition, and offset not known. " +
                                "timestamp=[{}] " +
                                "window={} " +
                                "expiration=[{}] " +
                                "streamTime=[{}]",
                        timestamp,
                        windowString,
                        windowExpire,
                        observedStreamTime
                );
            }
            droppedRecordsSensor.record();
        }
    }

    private SessionWindow mergeSessionWindow(final SessionWindow one, final SessionWindow two) {
        final long start = Math.min(one.start(), two.start());
        final long end = Math.max(one.end(), two.end());
        return new SessionWindow(start, end);
    }

    @Override
    public KTableValueGetterSupplier, VAgg> view() {
        return new KTableValueGetterSupplier, VAgg>() {
            @Override
            public KTableValueGetter, VAgg> get() {
                return new KTableSessionWindowValueGetter();
            }

            @Override
            public String[] storeNames() {
                return new String[]{storeName};
            }
        };
    }

    private class KTableSessionWindowValueGetter implements KTableValueGetter, VAgg> {

        private SessionStore store;

        @Override
        public void init(final ProcessorContext context) {
            store = context.getStateStore(storeName);
        }

        @Override
        public ValueAndTimestamp get(final Windowed key) {
            return ValueAndTimestamp.make(
                store.fetchSession(key.key(), key.window().start(), key.window().end()),
                key.window().end());
        }

        @Override
        public boolean isVersioned() {
            return false;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy