All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.streams.kstream.KStream Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kafka.streams.kstream;

import org.apache.kafka.common.annotation.InterfaceStability;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StreamPartitioner;

/**
 * {@link KStream} is an abstraction of a record stream of key-value pairs.
 * 

* A {@link KStream} is either defined from one or multiple Kafka topics that are consumed message by message or * the result of a {@link KStream} transformation. A {@link KTable} can also be converted into a {@link KStream}. *

* A {@link KStream} can be transformed record by record, joined with another {@link KStream} or {@link KTable}, or * can be aggregated into a {@link KTable}. * * @param Type of keys * @param Type of values * * @see KTable */ @InterfaceStability.Unstable public interface KStream { /** * Create a new instance of {@link KStream} that consists of all elements of this stream which satisfy a predicate. * * @param predicate the instance of {@link Predicate} * * @return a {@link KStream} that contains only those records that satisfy the given predicate */ KStream filter(Predicate predicate); /** * Create a new instance of {@link KStream} that consists all elements of this stream which do not satisfy a predicate. * * @param predicate the instance of {@link Predicate} * * @return a {@link KStream} that contains only those records that do not satisfy the given predicate */ KStream filterNot(Predicate predicate); /** * Create a new key from the current key and value. * * @param mapper the instance of {@link KeyValueMapper} * @param the new key type on the stream * * @return a {@link KStream} that contains records with different key type and same value type */ KStream selectKey(KeyValueMapper mapper); /** * Create a new instance of {@link KStream} by transforming each element in this stream into a different element in the new stream. * * @param mapper the instance of {@link KeyValueMapper} * @param the key type of the new stream * @param the value type of the new stream * * @return a {@link KStream} that contains records with new key and value type */ KStream map(KeyValueMapper> mapper); /** * Create a new instance of {@link KStream} by transforming the value of each element in this stream into a new value in the new stream. * * @param mapper the instance of {@link ValueMapper} * @param the value type of the new stream * * @return a {@link KStream} that contains records with unmodified keys and new values of different type */ KStream mapValues(ValueMapper mapper); /** * Print the elements of this stream to System.out * * Implementors will need to override toString for keys and values that are not of * type String, Integer etc to get meaningful information. */ void print(); /** * Print the elements of this stream to System.out * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * * Implementors will need to override toString for keys and values that are not of * type String, Integer etc to get meaningful information. */ void print(Serde keySerde, Serde valSerde); /** * Write the elements of this stream to a file at the given path. * * @param filePath name of file to write to * * Implementors will need to override toString for keys and values that are not of * type String, Integer etc to get meaningful information. */ void writeAsText(String filePath); /** * @param filePath name of file to write to * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * * Implementors will need to override toString for keys and values that are not of * type String, Integer etc to get meaningful information. */ void writeAsText(String filePath, Serde keySerde, Serde valSerde); /** * Create a new instance of {@link KStream} by transforming each element in this stream into zero or more elements in the new stream. * * @param mapper the instance of {@link KeyValueMapper} * @param the key type of the new stream * @param the value type of the new stream * * @return a {@link KStream} that contains more or less records with new key and value type */ KStream flatMap(KeyValueMapper>> mapper); /** * Create a new instance of {@link KStream} by transforming the value of each element in this stream into zero or more values with the same key in the new stream. * * @param processor the instance of {@link ValueMapper} * @param the value type of the new stream * * @return a {@link KStream} that contains more or less records with unmodified keys and new values of different type */ KStream flatMapValues(ValueMapper> processor); /** * Creates an array of {@link KStream} from this stream by branching the elements in the original stream based on the supplied predicates. * Each element is evaluated against the supplied predicates, and predicates are evaluated in order. Each stream in the result array * corresponds position-wise (index) to the predicate in the supplied predicates. The branching happens on first-match: An element * in the original stream is assigned to the corresponding result stream for the first predicate that evaluates to true, and * assigned to this stream only. An element will be dropped if none of the predicates evaluate to true. * * @param predicates the ordered list of {@link Predicate} instances * * @return multiple distinct substreams of this {@link KStream} */ KStream[] branch(Predicate... predicates); /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic * using default serializers and deserializers and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(String...)}. * * @param topic the topic name * * @return a {@link KStream} that contains the exact same records as this {@link KStream} */ KStream through(String topic); /** * Perform an action on each element of {@link KStream}. * Note that this is a terminal operation that returns void. * * @param action an action to perform on each element */ void foreach(ForeachAction action); /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic * using default serializers and deserializers and a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. * This is equivalent to calling {@link #to(StreamPartitioner, String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(String...)}. * * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used * @param topic the topic name * * @return a {@link KStream} that contains the exact same records as this {@link KStream} */ KStream through(StreamPartitioner partitioner, String topic); /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic. * If {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} * for the key {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. * This is equivalent to calling {@link #to(Serde, Serde, String)} and * {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(Serde, Serde, String...)}. * * @param keySerde key serde used to send key-value pairs, * if not specified the default key serde defined in the configuration will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default value serde defined in the configuration will be used * @param topic the topic name * * @return a {@link KStream} that contains the exact same records as this {@link KStream} */ KStream through(Serde keySerde, Serde valSerde, String topic); /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic * using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. * This is equivalent to calling {@link #to(Serde, Serde, StreamPartitioner, String)} and * {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(Serde, Serde, String...)}. * * @param keySerde key serde used to send key-value pairs, * if not specified the default key serde defined in the configuration will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default value serde defined in the configuration will be used * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used * @param topic the topic name * * @return a {@link KStream} that contains the exact same records as this {@link KStream} */ KStream through(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); /** * Materialize this stream to a topic using default serializers specified in the config * and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * * @param topic the topic name */ void to(String topic); /** * Materialize this stream to a topic using default serializers specified in the config and a customizable * {@link StreamPartitioner} to determine the distribution of records to partitions. * * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used * @param topic the topic name */ void to(StreamPartitioner partitioner, String topic); /** * Materialize this stream to a topic. If {@code keySerde} provides a * {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param topic the topic name */ void to(Serde keySerde, Serde valSerde, String topic); /** * Materialize this stream to a topic using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used * @param topic the topic name */ void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); /** * Create a new {@link KStream} instance by applying a {@link org.apache.kafka.streams.kstream.Transformer} to all elements in this stream, one element at a time. * * @param transformerSupplier the instance of {@link TransformerSupplier} that generates {@link org.apache.kafka.streams.kstream.Transformer} * @param stateStoreNames the names of the state store used by the processor * * @return a new {@link KStream} with transformed key and value types */ KStream transform(TransformerSupplier> transformerSupplier, String... stateStoreNames); /** * Create a new {@link KStream} instance by applying a {@link org.apache.kafka.streams.kstream.ValueTransformer} to all values in this stream, one element at a time. * * @param valueTransformerSupplier the instance of {@link ValueTransformerSupplier} that generates {@link org.apache.kafka.streams.kstream.ValueTransformer} * @param stateStoreNames the names of the state store used by the processor * * @return a {@link KStream} that contains records with unmodified keys and transformed values with type {@code R} */ KStream transformValues(ValueTransformerSupplier valueTransformerSupplier, String... stateStoreNames); /** * Process all elements in this stream, one element at a time, by applying a {@link org.apache.kafka.streams.processor.Processor}. * * @param processorSupplier the supplier of {@link ProcessorSupplier} that generates {@link org.apache.kafka.streams.processor.Processor} * @param stateStoreNames the names of the state store used by the processor */ void process(ProcessorSupplier processorSupplier, String... stateStoreNames); /** * Combine element values of this stream with another {@link KStream}'s elements of the same key using windowed Inner Join. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param keySerde key serdes for materializing both streams, * if not specified the default serdes defined in the configs will be used * @param thisValueSerde value serdes for materializing this stream, * if not specified the default serdes defined in the configs will be used * @param otherValueSerde value serdes for materializing the other stream, * if not specified the default serdes defined in the configs will be used * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream join( KStream otherStream, ValueJoiner joiner, JoinWindows windows, Serde keySerde, Serde thisValueSerde, Serde otherValueSerde); /** * Combine element values of this stream with another {@link KStream}'s elements of the same key using windowed Inner Join * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream join( KStream otherStream, ValueJoiner joiner, JoinWindows windows); /** * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Outer Join. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param keySerde key serdes for materializing both streams, * if not specified the default serdes defined in the configs will be used * @param thisValueSerde value serdes for materializing this stream, * if not specified the default serdes defined in the configs will be used * @param otherValueSerde value serdes for materializing the other stream, * if not specified the default serdes defined in the configs will be used * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream outerJoin( KStream otherStream, ValueJoiner joiner, JoinWindows windows, Serde keySerde, Serde thisValueSerde, Serde otherValueSerde); /** * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Outer Join * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream outerJoin( KStream otherStream, ValueJoiner joiner, JoinWindows windows); /** * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Left Join. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param keySerde key serdes for materializing the other stream, * if not specified the default serdes defined in the configs will be used * @param otherValueSerde value serdes for materializing the other stream, * if not specified the default serdes defined in the configs will be used * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream leftJoin( KStream otherStream, ValueJoiner joiner, JoinWindows windows, Serde keySerde, Serde otherValueSerde); /** * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Left Join * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param windows the specification of the {@link JoinWindows} * @param the value type of the other stream * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream leftJoin( KStream otherStream, ValueJoiner joiner, JoinWindows windows); /** * Combine values of this stream with {@link KTable}'s elements of the same key using non-windowed Left Join. * * @param table the instance of {@link KTable} joined with this stream * @param joiner the instance of {@link ValueJoiner} * @param the value type of the table * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, * one for each matched record-pair with the same key and within the joining window intervals */ KStream leftJoin(KTable table, ValueJoiner joiner); /** * Combine values of this stream by key on a window basis into a new instance of windowed {@link KTable}. * * @param reducer the instance of {@link Reducer} * @param windows the specification of the aggregation {@link Windows} * @param keySerde key serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param valueSerde value serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values * that represent the latest (rolling) aggregate for each key within that window */ KTable, V> reduceByKey(Reducer reducer, Windows windows, Serde keySerde, Serde valueSerde); /** * Combine values of this stream by key on a window basis into a new instance of windowed {@link KTable} * with default serializers and deserializers. * * @param reducer the instance of {@link Reducer} * @param windows the specification of the aggregation {@link Windows} * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values * that represent the latest (rolling) aggregate for each key within that window */ KTable, V> reduceByKey(Reducer reducer, Windows windows); /** * Combine values of this stream by key into a new instance of ever-updating {@link KTable}. * * @param reducer the instance of {@link Reducer} * @param keySerde key serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param valueSerde value serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param name the name of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values that represent the latest (rolling) aggregate for each key */ KTable reduceByKey(Reducer reducer, Serde keySerde, Serde valueSerde, String name); /** * Combine values of this stream by key into a new instance of ever-updating {@link KTable} with default serializers and deserializers. * * @param reducer the instance of {@link Reducer} * @param name the name of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values that represent the latest (rolling) aggregate for each key */ KTable reduceByKey(Reducer reducer, String name); /** * Aggregate values of this stream by key on a window basis into a new instance of windowed {@link KTable}. * * @param initializer the instance of {@link Initializer} * @param aggregator the instance of {@link Aggregator} * @param windows the specification of the aggregation {@link Windows} * @param keySerde key serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param aggValueSerde aggregate value serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param the value type of the resulted {@link KTable} * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values with type {@code T} * that represent the latest (rolling) aggregate for each key within that window */ KTable, T> aggregateByKey(Initializer initializer, Aggregator aggregator, Windows windows, Serde keySerde, Serde aggValueSerde); /** * Aggregate values of this stream by key on a window basis into a new instance of windowed {@link KTable} * with default serializers and deserializers. * * @param initializer the instance of {@link Initializer} * @param aggregator the instance of {@link Aggregator} * @param windows the specification of the aggregation {@link Windows} * @param the value type of the resulted {@link KTable} * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values with type {@code T} * that represent the latest (rolling) aggregate for each key within that window */ KTable, T> aggregateByKey(Initializer initializer, Aggregator aggregator, Windows windows); /** * Aggregate values of this stream by key into a new instance of ever-updating {@link KTable}. * * @param initializer the class of {@link Initializer} * @param aggregator the class of {@link Aggregator} * @param keySerde key serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param aggValueSerde aggregate value serdes for materializing the aggregated table, * if not specified the default serdes defined in the configs will be used * @param name the name of the resulted {@link KTable} * @param the value type of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values (of different type) that represent the latest (rolling) aggregate for each key */ KTable aggregateByKey(Initializer initializer, Aggregator aggregator, Serde keySerde, Serde aggValueSerde, String name); /** * Aggregate values of this stream by key into a new instance of ever-updating {@link KTable} * with default serializers and deserializers. * * @param initializer the class of {@link Initializer} * @param aggregator the class of {@link Aggregator} * @param name the name of the resulted {@link KTable} * @param the value type of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values (of different type) that represent the latest (rolling) aggregate for each key */ KTable aggregateByKey(Initializer initializer, Aggregator aggregator, String name); /** * Count number of records of this stream by key on a window basis into a new instance of windowed {@link KTable}. * * @param windows the specification of the aggregation {@link Windows} * @param keySerde key serdes for materializing the counting table, * if not specified the default serdes defined in the configs will be used * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values * that represent the latest (rolling) count (i.e., number of records) for each key within that window */ KTable, Long> countByKey(Windows windows, Serde keySerde); /** * Count number of records of this stream by key on a window basis into a new instance of windowed {@link KTable} * with default serializers and deserializers. * * @param windows the specification of the aggregation {@link Windows} * * @return a windowed {@link KTable} which can be treated as a list of {@code KTable}s * where each table contains records with unmodified keys and values * that represent the latest (rolling) count (i.e., number of records) for each key within that window */ KTable, Long> countByKey(Windows windows); /** * Count number of records of this stream by key into a new instance of ever-updating {@link KTable}. * * @param keySerde key serdes for materializing the counting table, * if not specified the default serdes defined in the configs will be used * @param name the name of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values that represent the latest (rolling) count (i.e., number of records) for each key */ KTable countByKey(Serde keySerde, String name); /** * Count number of records of this stream by key into a new instance of ever-updating {@link KTable} * with default serializers and deserializers. * * @param name the name of the resulted {@link KTable} * * @return a {@link KTable} that contains records with unmodified keys and values that represent the latest (rolling) count (i.e., number of records) for each key */ KTable countByKey(String name); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy